Skip to content

Commit cb17c83

Browse files
committed
More improvements to crate version analysis script
* Only cache crates versions in a file * Cache versions by default * Exclude pre-release versions by default * Don't show version metadata in report output Change-Id: I29e3e36421000c0bda17ef761f9543766a6a6964
1 parent 094d520 commit cb17c83

File tree

1 file changed

+152
-63
lines changed

1 file changed

+152
-63
lines changed

scripts/analyze_crate_versions.py

Lines changed: 152 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,17 @@
88
crates.io:
99
python3 scripts/analyze_crate_versions.py
1010
11-
# Run analysis and save the results to a file for later use:
12-
python3 scripts/analyze_crate_versions.py --save crate_report.json
13-
14-
# Load previously saved data to explore the report without re-fetching from
15-
crates.io:
16-
python3 scripts/analyze_crate_versions.py --load crate_report.json
17-
1811
# Analyze a specific lock file:
1912
python3 scripts/analyze_crate_versions.py path/to/Cargo.lock
13+
14+
# Force update the versions cache:
15+
python3 scripts/analyze_crate_versions.py --update
2016
"""
2117

2218
import argparse
2319
import json
2420
import os
21+
import pathlib
2522
import re
2623
import ssl
2724
import sys
@@ -138,9 +135,7 @@ def get_crate_versions(crate_name):
138135
req = urllib.request.Request(
139136
url, headers={"User-Agent": "Oak-Crate-Analyzer/0.1"}
140137
)
141-
with urllib.request.urlopen(
142-
req, timeout=5, context=ssl_context
143-
) as response:
138+
with urllib.request.urlopen(req, timeout=5, context=ssl_context) as response:
144139
data = json.loads(response.read().decode())
145140
# Return list of (version_string, is_yanked)
146141
return [(v["num"], v["yanked"]) for v in data.get("versions", [])]
@@ -180,7 +175,9 @@ def find_latest_updates(actual_v, all_versions):
180175
return {k: v for k, v in updates.items() if v}
181176

182177

183-
def collect_crate_data(bzl_file, lock_file, filter_str=None):
178+
def collect_crate_data(
179+
bzl_file, lock_file, filter_str=None, cached_versions=None
180+
):
184181
"""Collect crate version data from Bazel config, Cargo.lock, and crates.io."""
185182
requested_crates = parse_oak_crates(bzl_file)
186183
actual_crates = parse_cargo_lock(lock_file)
@@ -191,35 +188,56 @@ def collect_crate_data(bzl_file, lock_file, filter_str=None):
191188

192189
total = len(crate_names)
193190
data = []
191+
cached_versions = cached_versions or {}
192+
193+
fetched_any = False
194194
for i, name in enumerate(crate_names, 1):
195-
print(
196-
f"[{i}/{total}] Fetching versions for {name}...",
197-
end="\r",
198-
file=sys.stderr,
199-
)
200195
requested = requested_crates[name]
201196
actual_list = actual_crates.get(name, [])
202197

203-
versions = get_crate_versions(name)
204-
# Small delay to be nice to crates.io
205-
time.sleep(0.05)
198+
if name in cached_versions:
199+
versions = cached_versions[name]
200+
else:
201+
fetched_any = True
202+
print(
203+
f"[{i}/{total}] Fetching versions for {name}...",
204+
end="\r",
205+
file=sys.stderr,
206+
)
207+
versions = get_crate_versions(name)
208+
# Small delay to be nice to crates.io
209+
time.sleep(0.05)
206210

207211
data.append({
208212
"name": name,
209213
"requested": requested,
210214
"actual": actual_list,
211215
"versions": versions,
212216
})
213-
print(f"\nFinished fetching data for {total} crates.", file=sys.stderr)
217+
if fetched_any:
218+
print(f"\nFinished fetching data for {total} crates.", file=sys.stderr)
214219
return data
215220

216221

217-
def print_report(data, total_count=None, exclude_pre_release=False):
222+
def strip_metadata(v):
223+
"""Remove build metadata (anything after +) from a version string."""
224+
if not isinstance(v, str) or v == "*" or v == "git":
225+
return v
226+
return v.split("+", 1)[0]
227+
228+
229+
def print_report(
230+
data,
231+
total_count=None,
232+
include_pre_release=False,
233+
versions_path=None,
234+
cache_age=None,
235+
):
218236
"""Print a report of crate versions and available updates."""
219237
# Define a shared format string for the table to ensure perfect alignment
220238
row_fmt = "{:<30} | {:<12} | {:<12} | {:<12} | {:<12} | {:<12}"
221239

222-
if exclude_pre_release:
240+
if not include_pre_release:
223241
data = [d for d in data if not is_pre_release(d["requested"])]
224242

225243
print(
@@ -238,14 +256,14 @@ def print_report(data, total_count=None, exclude_pre_release=False):
238256

239257
for entry in data:
240258
name = entry["name"]
241-
requested = entry["requested"]
259+
requested = strip_metadata(entry["requested"])
242260
actual_list = entry["actual"]
243261
versions_info = entry.get("versions", [])
244262

245263
# Filter out yanked versions
246264
all_versions = [v for v, yanked in versions_info if not yanked]
247265

248-
if exclude_pre_release:
266+
if not include_pre_release:
249267
all_versions = [v for v in all_versions if not is_pre_release(v)]
250268
actual_list = [v for v in actual_list if not is_pre_release(v)]
251269

@@ -255,19 +273,19 @@ def print_report(data, total_count=None, exclude_pre_release=False):
255273

256274
if not actual_list:
257275
max_v = max(all_versions, key=version_key) if all_versions else "N/A"
258-
l_major = max_v
276+
l_major = strip_metadata(max_v)
259277
else:
260278
highest_actual = max(actual_list, key=version_key)
261279
updates = find_latest_updates(highest_actual, all_versions)
262280

263281
if updates.get("Patch"):
264-
l_patch = updates["Patch"]
282+
l_patch = strip_metadata(updates["Patch"])
265283
summary["Patch"] += 1
266284
if updates.get("Minor"):
267-
l_minor = updates["Minor"]
285+
l_minor = strip_metadata(updates["Minor"])
268286
summary["Minor"] += 1
269287
if updates.get("Major"):
270-
l_major = updates["Major"]
288+
l_major = strip_metadata(updates["Major"])
271289
summary["Major"] += 1
272290

273291
if not actual_list:
@@ -276,10 +294,15 @@ def print_report(data, total_count=None, exclude_pre_release=False):
276294
)
277295
else:
278296
for i, v in enumerate(actual_list):
297+
display_v = strip_metadata(v)
279298
if i == 0:
280-
print(row_fmt.format(name, requested, v, l_patch, l_minor, l_major))
299+
print(
300+
row_fmt.format(
301+
name, requested, display_v, l_patch, l_minor, l_major
302+
)
303+
)
281304
else:
282-
print(row_fmt.format("", "", v, "", "", ""))
305+
print(row_fmt.format("", "", display_v, "", "", ""))
283306

284307
print("-" * 110)
285308
print("\nUpdate Summary:")
@@ -292,12 +315,32 @@ def print_report(data, total_count=None, exclude_pre_release=False):
292315
else:
293316
print(f" Total Crates Analyzed: {len(data)}")
294317

318+
if versions_path:
319+
age_str = (
320+
f" (updated {format_age(cache_age)} ago)"
321+
if cache_age is not None
322+
else ""
323+
)
324+
print(f"\nVersions Cache: {versions_path}{age_str}")
325+
print("To update the cache, run with the --update or -u flag.")
326+
327+
328+
def format_age(seconds):
329+
"""Format a time duration in seconds into a human-readable string."""
330+
if seconds < 60:
331+
return f"{int(seconds)}s"
332+
if seconds < 3600:
333+
return f"{int(seconds // 60)}m"
334+
if seconds < 86400:
335+
return f"{int(seconds // 3600)}h"
336+
return f"{int(seconds // 86400)}d"
337+
295338

296339
def main():
297340
parser = argparse.ArgumentParser(
298341
description=(
299-
"Analyze crate versions and generate reports. This tool can create"
300-
" JSON data structures for offline analysis."
342+
"Analyze crate versions and generate reports. Uses a local cache to"
343+
" store internet data."
301344
),
302345
epilog="""
303346
Report columns:
@@ -315,48 +358,92 @@ def main():
315358
default="Cargo.bazel.lock",
316359
help="Path to Cargo.lock or Cargo.bazel.lock",
317360
)
318-
parser.add_argument("--save", help="Save crate data to a JSON file")
319-
parser.add_argument("--load", help="Load crate data from a JSON file")
361+
parser.add_argument(
362+
"--versions_file", help="Path to a custom versions cache file"
363+
)
364+
parser.add_argument(
365+
"--update",
366+
"-u",
367+
action="store_true",
368+
help="Force update the versions cache from crates.io",
369+
)
320370
parser.add_argument(
321371
"--filter", "-f", help="Filter report by crate name (substring match)"
322372
)
323373
parser.add_argument(
324-
"--exclude-pre-release",
374+
"--include-pre-release",
325375
action="store_true",
326-
help="Exclude pre-release versions from the report",
376+
help="Include pre-release versions in the report",
327377
)
328378
args = parser.parse_args()
329379

330380
bzl_file = "bazel/crates/oak_crates.bzl"
331381

332-
if args.load:
333-
if not os.path.exists(args.load):
334-
print(f"Error: {args.load} not found")
335-
sys.exit(1)
336-
with open(args.load, "r") as f:
337-
data = json.load(f)
382+
default_cache_path = (
383+
pathlib.Path.home() / ".cache" / "oak" / "crate_cache.json"
384+
)
385+
versions_path = (
386+
pathlib.Path(args.versions_file)
387+
if args.versions_file
388+
else default_cache_path
389+
)
390+
391+
cached_versions = {}
392+
cache_age = None
393+
if versions_path.exists() and not args.update:
394+
cache_age = time.time() - versions_path.stat().st_mtime
395+
with open(versions_path, "r") as f:
396+
loaded_data = json.load(f)
397+
if isinstance(loaded_data, list):
398+
# Support old format (list of dicts)
399+
for entry in loaded_data:
400+
if "name" in entry and "versions" in entry:
401+
cached_versions[entry["name"]] = entry["versions"]
402+
elif isinstance(loaded_data, dict):
403+
# New format (dict of name -> versions)
404+
cached_versions = loaded_data
405+
elif args.update:
406+
print(
407+
f"Forcing update of versions from crates.io to {versions_path}...",
408+
file=sys.stderr,
409+
)
338410
else:
339-
if not os.path.exists(bzl_file):
340-
print(f"Error: {bzl_file} not found")
341-
sys.exit(1)
342-
if not os.path.exists(args.lock_file):
343-
print(f"Error: {args.lock_file} not found")
344-
sys.exit(1)
345-
346-
# If saving, we collect everything.
347-
# If not, we can speed up by filtering during collection.
348-
collect_filter = None if args.save else args.filter
349-
try:
350-
data = collect_crate_data(
351-
bzl_file, args.lock_file, filter_str=collect_filter
352-
)
353-
except Exception as e: # pylint: disable=broad-exception-caught
354-
print(f"Error during data collection: {e}")
355-
sys.exit(1)
411+
print(
412+
f"Versions cache {versions_path} not found. Fetching from crates.io...",
413+
file=sys.stderr,
414+
)
356415

357-
if args.save:
358-
with open(args.save, "w") as f:
359-
json.dump(data, f, indent=2)
416+
if not os.path.exists(bzl_file):
417+
print(f"Error: {bzl_file} not found")
418+
sys.exit(1)
419+
if not os.path.exists(args.lock_file):
420+
print(f"Error: {args.lock_file} not found")
421+
sys.exit(1)
422+
423+
# If we need to populate or update the cache, we collect everything.
424+
# If we have a cache and are not updating, we can filter during collection.
425+
need_full_fetch = not versions_path.exists() or args.update
426+
collect_filter = None if need_full_fetch else args.filter
427+
428+
try:
429+
data = collect_crate_data(
430+
bzl_file,
431+
args.lock_file,
432+
filter_str=collect_filter,
433+
cached_versions=cached_versions,
434+
)
435+
except Exception as e: # pylint: disable=broad-exception-caught
436+
print(f"Error during data collection: {e}")
437+
sys.exit(1)
438+
439+
# Save/Update the cache if we did a full fetch or it was missing
440+
if need_full_fetch:
441+
# Ensure directory exists for the cache
442+
versions_path.parent.mkdir(parents=True, exist_ok=True)
443+
save_data = {entry["name"]: entry["versions"] for entry in data}
444+
with open(versions_path, "w") as f:
445+
json.dump(save_data, f, indent=2)
446+
cache_age = 0
360447

361448
total_count = len(data)
362449
report_data = data
@@ -366,7 +453,9 @@ def main():
366453
print_report(
367454
report_data,
368455
total_count=total_count,
369-
exclude_pre_release=args.exclude_pre_release,
456+
include_pre_release=args.include_pre_release,
457+
versions_path=versions_path,
458+
cache_age=cache_age,
370459
)
371460

372461

0 commit comments

Comments
 (0)