Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/backfill_metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: backfill metadata

on:
workflow_dispatch:

jobs:
backfill-metadata:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.11'
- uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
with:
workload_identity_provider: projects/868781662168/locations/global/workloadIdentityPools/prod-github/providers/github-oidc-pool
service_account: gha-pypi@sac-prod-sa.iam.gserviceaccount.com
- run: python3 -uS bin/backfill-core-metadata --pypi-url https://pypi.devinfra.sentry.io
94 changes: 94 additions & 0 deletions bin/backfill-core-metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import hashlib
import json
import os.path
import shutil
import subprocess
import tempfile
import urllib.parse
import urllib.request
import zipfile
from collections.abc import Sequence


def _get_metadata_bytes(filename: str) -> bytes:
with zipfile.ZipFile(filename) as zipf:
(metadata,) = (
name
for name in zipf.namelist()
if name.endswith(".dist-info/METADATA") and name.count("/") == 1
)
with zipf.open(metadata) as f:
return f.read()


def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--pypi-url", required=True)
args = parser.parse_args(argv)

url = urllib.parse.urljoin(args.pypi_url, "packages.json")
packages = [json.loads(line) for line in urllib.request.urlopen(url)]

with tempfile.TemporaryDirectory() as tmpdir:
os.makedirs(f"{tmpdir}/metadata")
for package in packages:
basename = os.path.basename(package["filename"])

if package.get("core_metadata"):
print(f"skipping: core metadata already present for {basename}")
continue

url = f"{args.pypi_url}/wheels/{basename}"
fn = f"{tmpdir}/{basename}"

with urllib.request.urlopen(url) as resp, open(fn, "wb") as f:
shutil.copyfileobj(resp, f)

metadata_bytes = _get_metadata_bytes(fn)
metadata_sha256 = hashlib.sha256(metadata_bytes).hexdigest()

with open(f"{tmpdir}/metadata/{basename}.metadata", "wb") as f:
f.write(metadata_bytes)

package["core_metadata"] = f"sha256={metadata_sha256}"
print(f"core metadata fetched for {basename}")

packages_json = os.path.join(tmpdir, "packages.json")
with open(packages_json, "w") as f:
for package in packages:
f.write(f"{json.dumps(package)}\n")

subprocess.check_call(
(
"gcloud",
"storage",
"cp",
"-n", # no-clobber
"--cache-control",
"public, max-age=3600",
f"{tmpdir}/metadata/*",
"gs://pypi.devinfra.sentry.io/wheels/",
)
)
subprocess.check_call(
(
"gcloud",
"storage",
"cp",
# the packages.json file must be consistently read so no caching
"--cache-control",
"no-store",
packages_json,
"gs://pypi.devinfra.sentry.io",
)
)

return 0


if __name__ == "__main__":
raise SystemExit(main())
1 change: 1 addition & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM python:3.11.4-slim-bullseye

RUN : \
&& apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install \
Expand Down
2 changes: 1 addition & 1 deletion docker/requirements.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
auditwheel>=5.1.2
delocate>=0.10.4
dumb-pypi>=1.13.0
dumb-pypi>=1.15.0
packaging>=21.3
patchelf>=0.14.5;sys_platform=="linux"
pip>=22.1.2
Expand Down
2 changes: 1 addition & 1 deletion docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
auditwheel==5.1.2
delocate==0.10.4
distlib==0.3.8
dumb-pypi==1.13.0
dumb-pypi==1.15.0
filelock==3.13.1
jinja2==3.1.6
markupsafe==2.1.1
Expand Down
25 changes: 18 additions & 7 deletions make_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,37 @@ def _commit_info() -> tuple[str, int]:
return h, int(t)


def _make_info(filename: str) -> dict[str, Any]:
h, t = _commit_info()

with open(filename, "rb") as f:
sha256 = hashlib.sha256(f.read()).hexdigest()

def _get_metadata_bytes(filename: str) -> bytes:
with zipfile.ZipFile(filename) as zipf:
(metadata,) = (
name
for name in zipf.namelist()
if name.endswith(".dist-info/METADATA") and name.count("/") == 1
)
with zipf.open(metadata) as f:
info = email.message_from_binary_file(f)
return f.read()


def _make_info(filename: str) -> dict[str, Any]:
h, t = _commit_info()

with open(filename, "rb") as f:
sha256 = hashlib.sha256(f.read()).hexdigest()

metadata_bytes = _get_metadata_bytes(filename)
metadata_sha256 = hashlib.sha256(metadata_bytes).hexdigest()
info = email.message_from_bytes(metadata_bytes)

dist_info = {
"requires_dist": info.get_all("requires-dist"),
"requires_python": info.get("requires-python"),
}

# this is intended to be exactly the structure dumb-pypi generates
return {
"filename": os.path.basename(filename),
"hash": f"sha256={sha256}",
"core_metadata": f"sha256={metadata_sha256}",
"upload_timestamp": t,
"uploaded_by": f"git@{h}",
**{k: v for k, v in dist_info.items() if v},
Expand Down Expand Up @@ -92,6 +100,9 @@ def main(argv: Sequence[str] | None = None) -> int:
new_packages.append(_make_info(filename))
shutil.copy(filename, wheels_dir)

with open(f"{wheels_dir}/{basename}.metadata", "wb") as f:
f.write(_get_metadata_bytes(filename))

with tempfile.TemporaryDirectory() as tmpdir:
prev_json = os.path.join(tmpdir, "previous.json")
with open(prev_json, "w") as f:
Expand Down
1 change: 1 addition & 0 deletions packages.ini
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ validate_incorrect_missing_deps = psycopg2-binary
[drf-spectacular==0.27.2]

[dumb-pypi==1.13.0]
[dumb-pypi==1.15.0]

[ecdsa==0.18.0]

Expand Down
34 changes: 33 additions & 1 deletion tests/make_index_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io
import json
import os.path
import re
import urllib.request
import zipfile
from unittest import mock
Expand Down Expand Up @@ -30,6 +31,7 @@ def test_make_info_empty_wheel_metadata(tmp_path):
assert ret == {
"filename": "a-1-py3-none-any.whl",
"hash": "sha256=64f7f4664408d711c17ad28c1d3ba7dd155501e67c8632fafc8a525ba3ebc527",
"core_metadata": "sha256=d4528dc2d072c0e6d65addae8b5700fd29253b9eb9a9214aba539447d6f29fae",
"upload_timestamp": mock.ANY,
"uploaded_by": re_assert.Matches(r"^git@[a-f0-9]{7}"),
}
Expand All @@ -56,6 +58,7 @@ def test_make_info_full_wheel_metadata(tmp_path):
"jsonschema",
"packaging (==21.3) ; extra = 'p'",
],
"core_metadata": "sha256=a015186125a83e6667547b156f8c6813e72fbab48c4ae635ac3c3a5f1d86aa9f",
"requires_python": ">= 3.7, != 3.7.0",
"upload_timestamp": mock.ANY,
"uploaded_by": re_assert.Matches(r"^git@[a-f0-9]{7}"),
Expand All @@ -81,7 +84,36 @@ def test_main_new_package(tmp_path):
# just some smoke tests about the output
assert dest.joinpath("packages.json").exists()
assert dest.joinpath("wheels/a-1-py3-none-any.whl").exists()
assert dest.joinpath("simple/a/index.html").exists()


def test_main_core_metadata(tmp_path):
dist = tmp_path.joinpath("dist")
dist.mkdir()
make_wheel(dist.joinpath("a-1-py3-none-any.whl"), ())
dest = tmp_path.joinpath("dest")

bio = io.BytesIO(b"")
with mock.patch.object(urllib.request, "urlopen", return_value=bio):
assert not make_index.main(
(
f"--dist={dist}",
f"--dest={dest}",
"--pypi-url=http://example.com",
)
)

wheel_sha = "64f7f4664408d711c17ad28c1d3ba7dd155501e67c8632fafc8a525ba3ebc527"
metadata_sha = "d4528dc2d072c0e6d65addae8b5700fd29253b9eb9a9214aba539447d6f29fae"

with open(dest.joinpath("simple/a/index.html")) as f:
index_html = re.sub(r"\s+", " ", f.read())
assert (
f'<a href="http://example.com/wheels/a-1-py3-none-any.whl#sha256={wheel_sha}" data-core-metadata="sha256={metadata_sha}" >a-1-py3-none-any.whl</a>'
in index_html
)

with open(dest.joinpath("wheels/a-1-py3-none-any.whl.metadata")) as f:
assert f.read() == "Name: a\nVersion: 1\n"


def test_main_multiple_provide_same_package_first_wins(tmp_path):
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = py38
envlist = py

[testenv]
skip_install = true
Expand Down
Loading