88 crates.io:
99 python3 scripts/analyze_crate_versions.py
1010
11- # Run analysis and save the results to a file for later use:
12- python3 scripts/analyze_crate_versions.py --save crate_report.json
13-
14- # Load previously saved data to explore the report without re-fetching from
15- crates.io:
16- python3 scripts/analyze_crate_versions.py --load crate_report.json
17-
1811 # Analyze a specific lock file:
1912 python3 scripts/analyze_crate_versions.py path/to/Cargo.lock
13+
14+ # Force update the versions cache:
15+ python3 scripts/analyze_crate_versions.py --update
2016"""
2117
2218import argparse
2319import json
2420import os
21+ import pathlib
2522import re
2623import ssl
2724import sys
@@ -138,9 +135,7 @@ def get_crate_versions(crate_name):
138135 req = urllib .request .Request (
139136 url , headers = {"User-Agent" : "Oak-Crate-Analyzer/0.1" }
140137 )
141- with urllib .request .urlopen (
142- req , timeout = 5 , context = ssl_context
143- ) as response :
138+ with urllib .request .urlopen (req , timeout = 5 , context = ssl_context ) as response :
144139 data = json .loads (response .read ().decode ())
145140 # Return list of (version_string, is_yanked)
146141 return [(v ["num" ], v ["yanked" ]) for v in data .get ("versions" , [])]
@@ -180,7 +175,9 @@ def find_latest_updates(actual_v, all_versions):
180175 return {k : v for k , v in updates .items () if v }
181176
182177
183- def collect_crate_data (bzl_file , lock_file , filter_str = None ):
178+ def collect_crate_data (
179+ bzl_file , lock_file , filter_str = None , cached_versions = None
180+ ):
184181 """Collect crate version data from Bazel config, Cargo.lock, and crates.io."""
185182 requested_crates = parse_oak_crates (bzl_file )
186183 actual_crates = parse_cargo_lock (lock_file )
@@ -191,35 +188,56 @@ def collect_crate_data(bzl_file, lock_file, filter_str=None):
191188
192189 total = len (crate_names )
193190 data = []
191+ cached_versions = cached_versions or {}
192+
193+ fetched_any = False
194194 for i , name in enumerate (crate_names , 1 ):
195- print (
196- f"[{ i } /{ total } ] Fetching versions for { name } ..." ,
197- end = "\r " ,
198- file = sys .stderr ,
199- )
200195 requested = requested_crates [name ]
201196 actual_list = actual_crates .get (name , [])
202197
203- versions = get_crate_versions (name )
204- # Small delay to be nice to crates.io
205- time .sleep (0.05 )
198+ if name in cached_versions :
199+ versions = cached_versions [name ]
200+ else :
201+ fetched_any = True
202+ print (
203+ f"[{ i } /{ total } ] Fetching versions for { name } ..." ,
204+ end = "\r " ,
205+ file = sys .stderr ,
206+ )
207+ versions = get_crate_versions (name )
208+ # Small delay to be nice to crates.io
209+ time .sleep (0.05 )
206210
207211 data .append ({
208212 "name" : name ,
209213 "requested" : requested ,
210214 "actual" : actual_list ,
211215 "versions" : versions ,
212216 })
213- print (f"\n Finished fetching data for { total } crates." , file = sys .stderr )
217+ if fetched_any :
218+ print (f"\n Finished fetching data for { total } crates." , file = sys .stderr )
214219 return data
215220
216221
217- def print_report (data , total_count = None , exclude_pre_release = False ):
222+ def strip_metadata (v ):
223+ """Remove build metadata (anything after +) from a version string."""
224+ if not isinstance (v , str ) or v == "*" or v == "git" :
225+ return v
226+ return v .split ("+" , 1 )[0 ]
227+
228+
229+ def print_report (
230+ data ,
231+ total_count = None ,
232+ include_pre_release = False ,
233+ versions_path = None ,
234+ cache_age = None ,
235+ ):
218236 """Print a report of crate versions and available updates."""
219237 # Define a shared format string for the table to ensure perfect alignment
220238 row_fmt = "{:<30} | {:<12} | {:<12} | {:<12} | {:<12} | {:<12}"
221239
222- if exclude_pre_release :
240+ if not include_pre_release :
223241 data = [d for d in data if not is_pre_release (d ["requested" ])]
224242
225243 print (
@@ -238,14 +256,14 @@ def print_report(data, total_count=None, exclude_pre_release=False):
238256
239257 for entry in data :
240258 name = entry ["name" ]
241- requested = entry ["requested" ]
259+ requested = strip_metadata ( entry ["requested" ])
242260 actual_list = entry ["actual" ]
243261 versions_info = entry .get ("versions" , [])
244262
245263 # Filter out yanked versions
246264 all_versions = [v for v , yanked in versions_info if not yanked ]
247265
248- if exclude_pre_release :
266+ if not include_pre_release :
249267 all_versions = [v for v in all_versions if not is_pre_release (v )]
250268 actual_list = [v for v in actual_list if not is_pre_release (v )]
251269
@@ -255,19 +273,19 @@ def print_report(data, total_count=None, exclude_pre_release=False):
255273
256274 if not actual_list :
257275 max_v = max (all_versions , key = version_key ) if all_versions else "N/A"
258- l_major = max_v
276+ l_major = strip_metadata ( max_v )
259277 else :
260278 highest_actual = max (actual_list , key = version_key )
261279 updates = find_latest_updates (highest_actual , all_versions )
262280
263281 if updates .get ("Patch" ):
264- l_patch = updates ["Patch" ]
282+ l_patch = strip_metadata ( updates ["Patch" ])
265283 summary ["Patch" ] += 1
266284 if updates .get ("Minor" ):
267- l_minor = updates ["Minor" ]
285+ l_minor = strip_metadata ( updates ["Minor" ])
268286 summary ["Minor" ] += 1
269287 if updates .get ("Major" ):
270- l_major = updates ["Major" ]
288+ l_major = strip_metadata ( updates ["Major" ])
271289 summary ["Major" ] += 1
272290
273291 if not actual_list :
@@ -276,10 +294,15 @@ def print_report(data, total_count=None, exclude_pre_release=False):
276294 )
277295 else :
278296 for i , v in enumerate (actual_list ):
297+ display_v = strip_metadata (v )
279298 if i == 0 :
280- print (row_fmt .format (name , requested , v , l_patch , l_minor , l_major ))
299+ print (
300+ row_fmt .format (
301+ name , requested , display_v , l_patch , l_minor , l_major
302+ )
303+ )
281304 else :
282- print (row_fmt .format ("" , "" , v , "" , "" , "" ))
305+ print (row_fmt .format ("" , "" , display_v , "" , "" , "" ))
283306
284307 print ("-" * 110 )
285308 print ("\n Update Summary:" )
@@ -292,12 +315,32 @@ def print_report(data, total_count=None, exclude_pre_release=False):
292315 else :
293316 print (f" Total Crates Analyzed: { len (data )} " )
294317
318+ if versions_path :
319+ age_str = (
320+ f" (updated { format_age (cache_age )} ago)"
321+ if cache_age is not None
322+ else ""
323+ )
324+ print (f"\n Versions Cache: { versions_path } { age_str } " )
325+ print ("To update the cache, run with the --update or -u flag." )
326+
327+
328+ def format_age (seconds ):
329+ """Format a time duration in seconds into a human-readable string."""
330+ if seconds < 60 :
331+ return f"{ int (seconds )} s"
332+ if seconds < 3600 :
333+ return f"{ int (seconds // 60 )} m"
334+ if seconds < 86400 :
335+ return f"{ int (seconds // 3600 )} h"
336+ return f"{ int (seconds // 86400 )} d"
337+
295338
296339def main ():
297340 parser = argparse .ArgumentParser (
298341 description = (
299- "Analyze crate versions and generate reports. This tool can create "
300- " JSON data structures for offline analysis ."
342+ "Analyze crate versions and generate reports. Uses a local cache to "
343+ " store internet data ."
301344 ),
302345 epilog = """
303346Report columns:
@@ -315,48 +358,92 @@ def main():
315358 default = "Cargo.bazel.lock" ,
316359 help = "Path to Cargo.lock or Cargo.bazel.lock" ,
317360 )
318- parser .add_argument ("--save" , help = "Save crate data to a JSON file" )
319- parser .add_argument ("--load" , help = "Load crate data from a JSON file" )
361+ parser .add_argument (
362+ "--versions_file" , help = "Path to a custom versions cache file"
363+ )
364+ parser .add_argument (
365+ "--update" ,
366+ "-u" ,
367+ action = "store_true" ,
368+ help = "Force update the versions cache from crates.io" ,
369+ )
320370 parser .add_argument (
321371 "--filter" , "-f" , help = "Filter report by crate name (substring match)"
322372 )
323373 parser .add_argument (
324- "--exclude -pre-release" ,
374+ "--include -pre-release" ,
325375 action = "store_true" ,
326- help = "Exclude pre-release versions from the report" ,
376+ help = "Include pre-release versions in the report" ,
327377 )
328378 args = parser .parse_args ()
329379
330380 bzl_file = "bazel/crates/oak_crates.bzl"
331381
332- if args .load :
333- if not os .path .exists (args .load ):
334- print (f"Error: { args .load } not found" )
335- sys .exit (1 )
336- with open (args .load , "r" ) as f :
337- data = json .load (f )
382+ default_cache_path = (
383+ pathlib .Path .home () / ".cache" / "oak" / "crate_cache.json"
384+ )
385+ versions_path = (
386+ pathlib .Path (args .versions_file )
387+ if args .versions_file
388+ else default_cache_path
389+ )
390+
391+ cached_versions = {}
392+ cache_age = None
393+ if versions_path .exists () and not args .update :
394+ cache_age = time .time () - versions_path .stat ().st_mtime
395+ with open (versions_path , "r" ) as f :
396+ loaded_data = json .load (f )
397+ if isinstance (loaded_data , list ):
398+ # Support old format (list of dicts)
399+ for entry in loaded_data :
400+ if "name" in entry and "versions" in entry :
401+ cached_versions [entry ["name" ]] = entry ["versions" ]
402+ elif isinstance (loaded_data , dict ):
403+ # New format (dict of name -> versions)
404+ cached_versions = loaded_data
405+ elif args .update :
406+ print (
407+ f"Forcing update of versions from crates.io to { versions_path } ..." ,
408+ file = sys .stderr ,
409+ )
338410 else :
339- if not os .path .exists (bzl_file ):
340- print (f"Error: { bzl_file } not found" )
341- sys .exit (1 )
342- if not os .path .exists (args .lock_file ):
343- print (f"Error: { args .lock_file } not found" )
344- sys .exit (1 )
345-
346- # If saving, we collect everything.
347- # If not, we can speed up by filtering during collection.
348- collect_filter = None if args .save else args .filter
349- try :
350- data = collect_crate_data (
351- bzl_file , args .lock_file , filter_str = collect_filter
352- )
353- except Exception as e : # pylint: disable=broad-exception-caught
354- print (f"Error during data collection: { e } " )
355- sys .exit (1 )
411+ print (
412+ f"Versions cache { versions_path } not found. Fetching from crates.io..." ,
413+ file = sys .stderr ,
414+ )
356415
357- if args .save :
358- with open (args .save , "w" ) as f :
359- json .dump (data , f , indent = 2 )
416+ if not os .path .exists (bzl_file ):
417+ print (f"Error: { bzl_file } not found" )
418+ sys .exit (1 )
419+ if not os .path .exists (args .lock_file ):
420+ print (f"Error: { args .lock_file } not found" )
421+ sys .exit (1 )
422+
423+ # If we need to populate or update the cache, we collect everything.
424+ # If we have a cache and are not updating, we can filter during collection.
425+ need_full_fetch = not versions_path .exists () or args .update
426+ collect_filter = None if need_full_fetch else args .filter
427+
428+ try :
429+ data = collect_crate_data (
430+ bzl_file ,
431+ args .lock_file ,
432+ filter_str = collect_filter ,
433+ cached_versions = cached_versions ,
434+ )
435+ except Exception as e : # pylint: disable=broad-exception-caught
436+ print (f"Error during data collection: { e } " )
437+ sys .exit (1 )
438+
439+ # Save/Update the cache if we did a full fetch or it was missing
440+ if need_full_fetch :
441+ # Ensure directory exists for the cache
442+ versions_path .parent .mkdir (parents = True , exist_ok = True )
443+ save_data = {entry ["name" ]: entry ["versions" ] for entry in data }
444+ with open (versions_path , "w" ) as f :
445+ json .dump (save_data , f , indent = 2 )
446+ cache_age = 0
360447
361448 total_count = len (data )
362449 report_data = data
@@ -366,7 +453,9 @@ def main():
366453 print_report (
367454 report_data ,
368455 total_count = total_count ,
369- exclude_pre_release = args .exclude_pre_release ,
456+ include_pre_release = args .include_pre_release ,
457+ versions_path = versions_path ,
458+ cache_age = cache_age ,
370459 )
371460
372461
0 commit comments