Skip to content

Commit 66869c5

Browse files
committed
[WIP] Parser + --allow-sorry
gherrit-pr-id: G101266ed5cfc0fe74d16aaae0785d9622e8da5be
1 parent 638d2d4 commit 66869c5

18 files changed

+419
-234
lines changed

tools/Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/hermes/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ clap = { version = "4.5.56", features = ["derive"] }
99
regex = "1.12.2"
1010
walkdir = "2.5.0"
1111
cargo_metadata = "0.18"
12+
syn = { version = "2.0.114", features = ["full", "visit", "extra-traits"] }
13+
quote = "1.0.43"

tools/hermes/src/main.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
use std::path::PathBuf;
1010

1111
use anyhow::Result;
12-
use cargo_hermes::pipeline::run_pipeline;
12+
use cargo_hermes::pipeline::{Sorry, run_pipeline};
1313
use clap::{Parser, Subcommand};
1414

1515
#[derive(Parser, Debug)]
@@ -43,14 +43,19 @@ pub enum Commands {
4343
/// Path to Cargo.toml or Cargo script
4444
#[arg(long)]
4545
manifest_path: Option<PathBuf>,
46+
/// Allow missing proofs (substitutes 'sorry') and instruct Lean to accept 'sorry'
47+
#[arg(long)]
48+
allow_sorry: bool,
4649
},
4750
}
4851

4952
fn main() -> Result<()> {
5053
let CargoCli::Hermes(args) = CargoCli::parse();
51-
let Commands::Verify { crate_name, dest, aeneas_path, manifest_path } = args.command;
54+
let Commands::Verify { crate_name, dest, aeneas_path, manifest_path, allow_sorry } =
55+
args.command;
5256

5357
let crate_root = std::env::current_dir()?;
5458
let dest = dest.canonicalize()?;
55-
run_pipeline(crate_name, &crate_root, &dest, aeneas_path, manifest_path)
59+
let sorry_mode = if allow_sorry { Sorry::AllowSorry } else { Sorry::RejectSorry };
60+
run_pipeline(crate_name, &crate_root, &dest, aeneas_path, manifest_path, sorry_mode)
5661
}

tools/hermes/src/parser.rs

Lines changed: 172 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,70 +6,188 @@
66
// This file may not be copied, modified, or distributed except according to
77
// those terms.
88

9-
use std::sync::LazyLock;
9+
use anyhow::{Result, bail};
10+
use syn::{
11+
Attribute, ItemFn, parse_file,
12+
visit::{self, Visit},
13+
};
1014

11-
use anyhow::Result;
12-
use regex::Regex;
13-
14-
/// Represents a specification block extracted from the source code.
15-
///
16-
/// Contains the function name it applies to and the raw body of the specification.
15+
/// Represents a function parsed from the source code, including its signature and attached specs.
1716
#[derive(Debug, Clone)]
18-
pub struct SpecBlock {
19-
pub function_name: String,
20-
pub body: String,
17+
pub struct ParsedFunction {
18+
pub fn_name: String,
19+
pub generics: syn::Generics,
20+
pub spec: Option<String>,
21+
pub proof: Option<String>,
2122
}
2223

23-
/// Represents a proof block extracted from the source code.
24-
///
25-
/// Contains the raw body of the proof.
26-
#[derive(Debug, Clone)]
27-
pub struct ProofBlock {
28-
pub body: String,
24+
pub struct ExtractedBlocks {
25+
pub functions: Vec<ParsedFunction>,
2926
}
3027

31-
#[derive(Debug, Clone)]
32-
pub struct ExtractedBlocks {
33-
pub specs: Vec<SpecBlock>,
34-
pub proofs: Vec<ProofBlock>,
28+
struct SpecVisitor {
29+
functions: Vec<ParsedFunction>,
30+
errors: Vec<anyhow::Error>,
3531
}
3632

37-
/// Extracts both specification and proof blocks from the provided source content.
38-
///
39-
/// This function uses regular expressions to find blocks formatted as:
40-
/// - `/*@ lean spec function_name ... @*/`
41-
/// - `/*@ proof ... @*/`
42-
pub fn extract_blocks(content: &str) -> Result<ExtractedBlocks> {
43-
// Regex matches:
44-
// - Start with `/*@`
45-
// - `lean spec` followed by function name
46-
// - Capture function name in `fn_name`
47-
// - Capture content in `body` (non-greedy)
48-
// - End with `@*/`
49-
static SPEC_RE: LazyLock<Regex> = LazyLock::new(|| {
50-
Regex::new(r"(?ms)/\*\@\s*lean\s+spec\s+(?P<fn_name>\w+)\s+(?P<body>.*?)\s*@\*/").unwrap()
51-
});
52-
53-
// Regex matches:
54-
// - Start with `/*@`
55-
// - `proof`
56-
// - Capture content in `body` (non-greedy)
57-
// - End with `@*/`
58-
static PROOF_RE: LazyLock<Regex> =
59-
LazyLock::new(|| Regex::new(r"(?ms)/\*\@\s*proof\s+(?P<body>.*?)\s*@\*/").unwrap());
60-
61-
let mut specs = Vec::new();
62-
for cap in SPEC_RE.captures_iter(content) {
63-
specs.push(SpecBlock {
64-
function_name: cap["fn_name"].to_string(),
65-
body: cap["body"].trim().to_string(),
66-
});
33+
impl SpecVisitor {
34+
fn new() -> Self {
35+
Self { functions: Vec::new(), errors: Vec::new() }
36+
}
37+
38+
fn check_attrs_for_misplaced_spec(&mut self, attrs: &[Attribute], item_kind: &str) {
39+
for attr in attrs {
40+
if let Some(doc_str) = parse_doc_attr(attr) {
41+
if doc_str.trim_start().starts_with("@") {
42+
self.errors.push(anyhow::anyhow!(
43+
"Found `///@` spec usage on a {}, but it is only allowed on functions.",
44+
item_kind
45+
));
46+
}
47+
}
48+
}
6749
}
50+
}
51+
52+
impl<'ast> Visit<'ast> for SpecVisitor {
53+
fn visit_item_fn(&mut self, node: &'ast ItemFn) {
54+
let fn_name = node.sig.ident.to_string();
55+
let mut spec_lines = Vec::new();
56+
let mut proof_lines = Vec::new();
57+
let mut current_mode = None; // None, Some("spec"), Some("proof")
58+
59+
for attr in &node.attrs {
60+
if let Some(doc_str) = parse_doc_attr(attr) {
61+
let trimmed = doc_str.trim();
62+
// Check for ///@ marker (doc comment starting with @)
63+
if trimmed.starts_with('@') {
64+
// Check if it's a new block start
65+
if let Some(content) = trimmed.strip_prefix("@ lean spec") {
66+
current_mode = Some("spec");
67+
spec_lines.push(content.to_string());
68+
} else if let Some(content) = trimmed.strip_prefix("@ lean model") {
69+
current_mode = Some("spec"); // Treat model as spec
70+
spec_lines.push(content.to_string());
71+
} else if let Some(content) = trimmed.strip_prefix("@ proof") {
72+
current_mode = Some("proof");
73+
proof_lines.push(content.to_string());
74+
} else {
75+
// Continuation line
76+
match current_mode {
77+
Some("spec") => {
78+
// strip leading @ and space?
79+
// User types `///@ ...` -> extracted `@ ...`
80+
// If we just extract `@`, we get ` ...`
81+
// The user might put `///@ ...`.
82+
// If I strip `@`, I get ` ...`.
83+
// I should probably strip the leading `@` and one optional space?
84+
// `trimmed` starts with `@`.
85+
let content = &trimmed[1..];
86+
spec_lines.push(content.to_string());
87+
}
88+
Some("proof") => {
89+
let content = &trimmed[1..];
90+
proof_lines.push(content.to_string());
91+
}
92+
None => {
93+
// Orphaned @ line? or maybe not meant for us?
94+
self.errors.push(anyhow::anyhow!("Found `///@` line without preceding `lean spec` or `proof` on function '{}'", fn_name));
95+
}
96+
_ => {} // Should not be possible with current_mode logic
97+
}
98+
}
99+
}
100+
}
101+
}
102+
103+
let spec = if !spec_lines.is_empty() {
104+
let full_spec = spec_lines.join("\n");
105+
let trimmed_spec = full_spec.trim();
106+
// Strip function name from the beginning of the spec
107+
if let Some(rest) = trimmed_spec.strip_prefix(fn_name.as_str()) {
108+
Some(rest.trim().to_string())
109+
} else {
110+
Some(trimmed_spec.to_string())
111+
}
112+
} else {
113+
None
114+
};
115+
116+
let proof = if !proof_lines.is_empty() { Some(proof_lines.join("\n")) } else { None };
117+
118+
if spec.is_some() || proof.is_some() {
119+
self.functions.push(ParsedFunction {
120+
fn_name,
121+
generics: node.sig.generics.clone(),
122+
spec,
123+
proof,
124+
});
125+
}
126+
127+
// Continue visiting children
128+
visit::visit_item_fn(self, node);
129+
}
130+
131+
fn visit_item_struct(&mut self, node: &'ast syn::ItemStruct) {
132+
self.check_attrs_for_misplaced_spec(&node.attrs, "struct");
133+
visit::visit_item_struct(self, node);
134+
}
135+
136+
fn visit_item_enum(&mut self, node: &'ast syn::ItemEnum) {
137+
self.check_attrs_for_misplaced_spec(&node.attrs, "enum");
138+
visit::visit_item_enum(self, node);
139+
}
140+
141+
fn visit_item_mod(&mut self, node: &'ast syn::ItemMod) {
142+
self.check_attrs_for_misplaced_spec(&node.attrs, "module");
143+
visit::visit_item_mod(self, node);
144+
}
145+
146+
fn visit_item_const(&mut self, node: &'ast syn::ItemConst) {
147+
self.check_attrs_for_misplaced_spec(&node.attrs, "const");
148+
visit::visit_item_const(self, node);
149+
}
150+
151+
// Catch-all for other items with attributes?
152+
// Ideally we'd cover all items, but these are the most common places users might mistakenly put docs.
153+
// Let's also cover TypeAlias and Trait
154+
155+
fn visit_item_type(&mut self, node: &'ast syn::ItemType) {
156+
self.check_attrs_for_misplaced_spec(&node.attrs, "type alias");
157+
visit::visit_item_type(self, node);
158+
}
159+
160+
fn visit_item_trait(&mut self, node: &'ast syn::ItemTrait) {
161+
self.check_attrs_for_misplaced_spec(&node.attrs, "trait");
162+
visit::visit_item_trait(self, node);
163+
}
164+
}
165+
166+
fn parse_doc_attr(attr: &Attribute) -> Option<String> {
167+
if !attr.path().is_ident("doc") {
168+
return None;
169+
}
170+
171+
// syn 2.0: doc = "..." is a NameValue meta
172+
match &attr.meta {
173+
syn::Meta::NameValue(nv) => match &nv.value {
174+
syn::Expr::Lit(syn::ExprLit { lit: syn::Lit::Str(s), .. }) => Some(s.value()),
175+
_ => None,
176+
},
177+
_ => None,
178+
}
179+
}
180+
181+
pub fn extract_blocks(content: &str) -> Result<ExtractedBlocks> {
182+
let ast = parse_file(content)?;
183+
let mut visitor = SpecVisitor::new();
184+
visitor.visit_file(&ast);
68185

69-
let mut proofs = Vec::new();
70-
for cap in PROOF_RE.captures_iter(content) {
71-
proofs.push(ProofBlock { body: cap["body"].trim().to_string() });
186+
if !visitor.errors.is_empty() {
187+
// Return the first error for now, or bundle them
188+
let msg = visitor.errors.iter().map(|e| e.to_string()).collect::<Vec<_>>().join("\n");
189+
bail!("Spec extraction failed:\n{}", msg);
72190
}
73191

74-
Ok(ExtractedBlocks { specs, proofs })
192+
Ok(ExtractedBlocks { functions: visitor.functions })
75193
}

0 commit comments

Comments
 (0)