Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions plugins/dative/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
__pycache__
.benchmarks
.idea
.mypy_cache
.pytest_cache
.ropeproject
.ruff_cache
.venv
.vscode
.zed

datasets/bird_minidev
.coverage
.DS_Store
34 changes: 34 additions & 0 deletions plugins/dative/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM astral/uv:0.8-python3.11-bookworm-slim AS builder

RUN apt-get update && apt-get install -y --no-install-recommends gcc libc-dev

ENV UV_COMPILE_BYTECODE=1 UV_NO_INSTALLER_METADATA=1 UV_LINK_MODE=copy UV_PYTHON_DOWNLOADS=0

# Install dependencies
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=scripts/install_duckdb_extensions.py,target=install_duckdb_extensions.py \
uv sync --frozen --no-install-project --no-dev \
&& uv run python install_duckdb_extensions.py

# clean the cache
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --no-install-project --no-dev

FROM python:3.11-slim-bookworm AS runtime

COPY --from=builder --chown=dative:dative /.venv /.venv
COPY --from=builder --chown=dative:dative /root/.duckdb /root/.duckdb

COPY src/dative /dative

COPY docker/entrypoint.sh /

ENV PATH="/.venv/bin:$PATH"

EXPOSE 3000

ENTRYPOINT ["/entrypoint.sh"]
44 changes: 44 additions & 0 deletions plugins/dative/Justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# 安装项目依赖库
install:
uv sync

U:
uv sync --upgrade

up package:
uv sync --upgrade-package {{ package }}

# 项目代码风格格式化
format:
uv run ruff format src tests/unit

# 执行项目checklist
check:
uv run ruff check --fix src tests/unit

sqlfmt: install
uv run scripts/sqlfmt.py src/dative/core/data_source/metadata_sql

# 项目类型检查
type:
uv run mypy src

# 执行单元测试
test:
uv run pytest tests/unit

# 执行功能测试
test_function:
uv run pytest tests/function

# 执行集成测试
test_integration:
uv run pytest tests/integration

ci: install format check type test

dev: install
uv run fastapi dev src/dative/main.py

run: install
uv run fastapi run src/dative/main.py
32 changes: 32 additions & 0 deletions plugins/dative/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
输入自然语言,生成sql查询,给出自然语言回答

## 特性:
- 支持mysql、postgresql、sqlite等数据库
- 结合duckdb强大本地数据库管理能力,支持本地和S3存储结构化数据,如:xlsx、xls、csv、xlsm、xlsb等
- 支持sql基本语法检查和优化
- 支持单个数据库查询,不支持跨数据库查询
- 仅支持sql查询,不支持更新、删除、插入等语句


## 本地开发

1、项目管理工具使用 [uv](https://github.com/astral-sh/uv),使用pip安装:
```bash
pip install uv
```

2、安装依赖包:
```
uv sync
```

3、安装duckdb扩展包:
```bash
uv run python scripts/install_duckdb_extensions.py
```

4、启动服务:
```bash
uv run fastapi run src/dative/main.py
```

51 changes: 51 additions & 0 deletions plugins/dative/docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

set -e

# Function to check if environment is development (case-insensitive)
is_development() {
local env
env=$(echo "${DATIVE_ENVIRONMENT}" | tr '[:upper:]' '[:lower:]')
[[ "$env" == "development" ]]
}

start_arq() {
echo "Starting ARQ worker..."
# Use exec to replace the shell with arq_worker if running in single process mode
if ! is_development; then
exec arq_worker
else
arq_worker &
ARQ_PID=$!
echo "ARQ worker started with PID: $ARQ_PID"
fi
}

start_uvicorn() {
echo "Starting Uvicorn server..."
# Use PORT environment variable, default to 3000 if not set
local port="${PORT:-3000}"
# Use exec to replace the shell with uvicorn if running in single process mode
if ! is_development; then
exec uvicorn dative.main:app --host '0.0.0.0' --port "$port"
else
uvicorn dative.main:app --host '0.0.0.0' --port "$port" &
UVICORN_PID=$!
echo "Uvicorn server started with PID: $UVICORN_PID"
fi
}

# Start the appropriate service
if [[ "${MODE}" == "worker" ]]; then
start_arq
else
start_uvicorn
fi

if is_development; then
sleep infinity
else
echo "Signal handling disabled. Process will replace shell (exec)."
# In this case, start_arq or start_uvicorn would have used exec
# So we shouldn't reach here unless something went wrong
fi
110 changes: 110 additions & 0 deletions plugins/dative/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
[project]
name = "dative"
version = "0.1.0"
description = "Query data from database by natural language"
requires-python = "~=3.11.0"
dependencies = [
"aiobotocore>=2.24.2",
"aiosqlite>=0.21.0",
"asyncpg>=0.30.0",
"cryptography>=45.0.6",
"duckdb>=1.3.2",
"fastapi<1.0",
"fastexcel>=0.15.1",
"greenlet>=3.2.4",
"httpx[socks]>=0.28.1",
"json-repair>=0.50.1",
"langchain-core<1.0",
"langchain-openai<1.0",
"mysql-connector-python>=9.4.0",
"orjson>=3.11.2",
"pyarrow>=21.0.0",
"pydantic-settings>=2.10.1",
"python-dateutil>=2.9.0.post0",
"python-dotenv>=1.1.1",
"pytz>=2025.2",
"sqlglot[rs]==27.12.0",
"uvicorn[standard]<1.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.sdist]
only-include = ["src/"]

[tool.hatch.build.targets.wheel]
packages = ["src/"]

[tool.uv]
index-url = "https://pypi.tuna.tsinghua.edu.cn/simple"
extra-index-url = ["https://mirrors.aliyun.com/pypi/simple"]

[dependency-groups]
dev = [
"hatch>=1.14.1",
"ruff>=0.12.10",
"pytest-async>=0.1.1",
"pytest-asyncio>=0.26.0",
"pytest-cov>=6.3.0",
"pytest-mock>=3.14.0",
"pytest>=8.3.3",
"pytest-benchmark>=4.0.0",
"fastapi-cli>=0.0.8",
"mypy>=1.17.1",
"types-python-dateutil>=2.9.0.20250822",
"pyarrow-stubs>=20.0.0.20250825",
"asyncpg-stubs>=0.30.2",
"boto3-stubs>=1.40.25",
"types-aiobotocore[essential]>=2.24.2",
]

[tool.uv.sources]

# checklist
[tool.ruff]
line-length = 120
target-version = "py311"
preview = true

[tool.ruff.lint]
select = [
"E", # pycodestyle
"F", # Pyflakes
"W", # Warning
"N", # PEP8 Naming
"I", # isort
"FAST", # FastAPI
]
# 禁用的规则
ignore = []

[tool.sqruff]
output_line_length = 120
max_line_length = 120

# 类型
[tool.mypy]
mypy_path = "./src"
exclude = ["tests/"]

# 单元测试
[tool.pytest.ini_options]
addopts = ["-rA", "--cov=dative"]
testpaths = ["tests/unit"]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
python_files = ["*.py"]

# 代码覆盖率
[tool.coverage.run]
branch = true
parallel = true
omit = ["**/__init__.py"]

[tool.coverage.report]
# fail_under = 85
show_missing = true
sort = "cover"
exclude_lines = ["no cov", "if __name__ == .__main__.:"]
8 changes: 8 additions & 0 deletions plugins/dative/scripts/install_duckdb_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import duckdb

extensions = [
"excel",
"httpfs"
]
for ext in extensions:
duckdb.install_extension(ext)
48 changes: 48 additions & 0 deletions plugins/dative/scripts/sqlfmt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
import argparse
import sys
from pathlib import Path

from sqlglot import transpile
from sqlglot.dialects.dialect import DIALECT_MODULE_NAMES
from sqlglot.errors import ParseError


def transpile_sql_file(file: Path, dialect: str) -> None:
if dialect not in DIALECT_MODULE_NAMES:
raise ValueError(f"Dialect {dialect} not supported")

if not file.is_file() or not file.suffix == ".sql":
print("Please specify a sql file")
return

sql_txt = file.read_text(encoding="utf-8")
try:
sqls = transpile(sql_txt, read=dialect, write=dialect, pretty=True, indent=4, pad=4)
file.write_text(";\n\n".join(sqls) + "\n", encoding="utf-8")
except ParseError as e:
print(str(e))


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("files_or_dirs", nargs='+', type=str, default=None, help="files or directories")
parser.add_argument('-d', '--dialect', type=str, help='输出文件')
args = parser.parse_args()

if args.files_or_dirs:
for file_or_dir in args.files_or_dirs:
fd = Path(file_or_dir)
if fd.is_file():
dialect = args.dialect or fd.stem.split(".")[-1]
transpile_sql_file(fd, dialect)
elif fd.is_dir():
for sql_file in fd.glob("*.sql"):
dialect = args.dialect or sql_file.stem.split(".")[-1]
transpile_sql_file(sql_file, dialect)
else:
print(f"{file_or_dir} is not a sql file or a directory")
sys.exit(3)
else:
print("Please specify a file or a directory")
sys.exit(3)
1 change: 1 addition & 0 deletions plugins/dative/src/dative/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# -*- coding: utf-8 -*-
9 changes: 9 additions & 0 deletions plugins/dative/src/dative/api/v1/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-

from fastapi import APIRouter

from . import data_source

router = APIRouter()

router.include_router(data_source.router, prefix="/data_source", tags=["data_source"])
Loading
Loading