|
6 | 6 | // This file may not be copied, modified, or distributed except according to |
7 | 7 | // those terms. |
8 | 8 |
|
9 | | -use std::sync::LazyLock; |
| 9 | +use anyhow::{Result, bail}; |
| 10 | +use syn::{ |
| 11 | + Attribute, ItemFn, parse_file, |
| 12 | + visit::{self, Visit}, |
| 13 | +}; |
10 | 14 |
|
11 | | -use anyhow::Result; |
12 | | -use regex::Regex; |
13 | | - |
14 | | -/// Represents a specification block extracted from the source code. |
15 | | -/// |
16 | | -/// Contains the function name it applies to and the raw body of the specification. |
| 15 | +/// Represents a function parsed from the source code, including its signature and attached specs. |
17 | 16 | #[derive(Debug, Clone)] |
18 | | -pub struct SpecBlock { |
19 | | - pub function_name: String, |
20 | | - pub body: String, |
| 17 | +pub struct ParsedFunction { |
| 18 | + pub fn_name: String, |
| 19 | + pub generics: syn::Generics, |
| 20 | + pub spec: Option<String>, |
| 21 | + pub proof: Option<String>, |
21 | 22 | } |
22 | 23 |
|
23 | | -/// Represents a proof block extracted from the source code. |
24 | | -/// |
25 | | -/// Contains the raw body of the proof. |
26 | | -#[derive(Debug, Clone)] |
27 | | -pub struct ProofBlock { |
28 | | - pub body: String, |
| 24 | +pub struct ExtractedBlocks { |
| 25 | + pub functions: Vec<ParsedFunction>, |
29 | 26 | } |
30 | 27 |
|
31 | | -#[derive(Debug, Clone)] |
32 | | -pub struct ExtractedBlocks { |
33 | | - pub specs: Vec<SpecBlock>, |
34 | | - pub proofs: Vec<ProofBlock>, |
| 28 | +struct SpecVisitor { |
| 29 | + functions: Vec<ParsedFunction>, |
| 30 | + errors: Vec<anyhow::Error>, |
35 | 31 | } |
36 | 32 |
|
37 | | -/// Extracts both specification and proof blocks from the provided source content. |
38 | | -/// |
39 | | -/// This function uses regular expressions to find blocks formatted as: |
40 | | -/// - `/*@ lean spec function_name ... @*/` |
41 | | -/// - `/*@ proof ... @*/` |
42 | | -pub fn extract_blocks(content: &str) -> Result<ExtractedBlocks> { |
43 | | - // Regex matches: |
44 | | - // - Start with `/*@` |
45 | | - // - `lean spec` followed by function name |
46 | | - // - Capture function name in `fn_name` |
47 | | - // - Capture content in `body` (non-greedy) |
48 | | - // - End with `@*/` |
49 | | - static SPEC_RE: LazyLock<Regex> = LazyLock::new(|| { |
50 | | - Regex::new(r"(?ms)/\*\@\s*lean\s+spec\s+(?P<fn_name>\w+)\s+(?P<body>.*?)\s*@\*/").unwrap() |
51 | | - }); |
52 | | - |
53 | | - // Regex matches: |
54 | | - // - Start with `/*@` |
55 | | - // - `proof` |
56 | | - // - Capture content in `body` (non-greedy) |
57 | | - // - End with `@*/` |
58 | | - static PROOF_RE: LazyLock<Regex> = |
59 | | - LazyLock::new(|| Regex::new(r"(?ms)/\*\@\s*proof\s+(?P<body>.*?)\s*@\*/").unwrap()); |
60 | | - |
61 | | - let mut specs = Vec::new(); |
62 | | - for cap in SPEC_RE.captures_iter(content) { |
63 | | - specs.push(SpecBlock { |
64 | | - function_name: cap["fn_name"].to_string(), |
65 | | - body: cap["body"].trim().to_string(), |
66 | | - }); |
| 33 | +impl SpecVisitor { |
| 34 | + fn new() -> Self { |
| 35 | + Self { functions: Vec::new(), errors: Vec::new() } |
| 36 | + } |
| 37 | + |
| 38 | + fn check_attrs_for_misplaced_spec(&mut self, attrs: &[Attribute], item_kind: &str) { |
| 39 | + for attr in attrs { |
| 40 | + if let Some(doc_str) = parse_doc_attr(attr) { |
| 41 | + if doc_str.trim_start().starts_with("@") { |
| 42 | + self.errors.push(anyhow::anyhow!( |
| 43 | + "Found `///@` spec usage on a {}, but it is only allowed on functions.", |
| 44 | + item_kind |
| 45 | + )); |
| 46 | + } |
| 47 | + } |
| 48 | + } |
67 | 49 | } |
| 50 | +} |
| 51 | + |
| 52 | +impl<'ast> Visit<'ast> for SpecVisitor { |
| 53 | + fn visit_item_fn(&mut self, node: &'ast ItemFn) { |
| 54 | + let fn_name = node.sig.ident.to_string(); |
| 55 | + let mut spec_lines = Vec::new(); |
| 56 | + let mut proof_lines = Vec::new(); |
| 57 | + let mut current_mode = None; // None, Some("spec"), Some("proof") |
| 58 | + |
| 59 | + for attr in &node.attrs { |
| 60 | + if let Some(doc_str) = parse_doc_attr(attr) { |
| 61 | + let trimmed = doc_str.trim(); |
| 62 | + // Check for ///@ marker (doc comment starting with @) |
| 63 | + if trimmed.starts_with('@') { |
| 64 | + // Check if it's a new block start |
| 65 | + if let Some(content) = trimmed.strip_prefix("@ lean spec") { |
| 66 | + current_mode = Some("spec"); |
| 67 | + spec_lines.push(content.to_string()); |
| 68 | + } else if let Some(content) = trimmed.strip_prefix("@ lean model") { |
| 69 | + current_mode = Some("spec"); // Treat model as spec |
| 70 | + spec_lines.push(content.to_string()); |
| 71 | + } else if let Some(content) = trimmed.strip_prefix("@ proof") { |
| 72 | + current_mode = Some("proof"); |
| 73 | + proof_lines.push(content.to_string()); |
| 74 | + } else { |
| 75 | + // Continuation line |
| 76 | + match current_mode { |
| 77 | + Some("spec") => { |
| 78 | + // strip leading @ and space? |
| 79 | + // User types `///@ ...` -> extracted `@ ...` |
| 80 | + // If we just extract `@`, we get ` ...` |
| 81 | + // The user might put `///@ ...`. |
| 82 | + // If I strip `@`, I get ` ...`. |
| 83 | + // I should probably strip the leading `@` and one optional space? |
| 84 | + // `trimmed` starts with `@`. |
| 85 | + let content = &trimmed[1..]; |
| 86 | + spec_lines.push(content.to_string()); |
| 87 | + } |
| 88 | + Some("proof") => { |
| 89 | + let content = &trimmed[1..]; |
| 90 | + proof_lines.push(content.to_string()); |
| 91 | + } |
| 92 | + None => { |
| 93 | + // Orphaned @ line? or maybe not meant for us? |
| 94 | + self.errors.push(anyhow::anyhow!("Found `///@` line without preceding `lean spec` or `proof` on function '{}'", fn_name)); |
| 95 | + } |
| 96 | + _ => {} // Should not be possible with current_mode logic |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + let spec = if !spec_lines.is_empty() { |
| 104 | + let full_spec = spec_lines.join("\n"); |
| 105 | + let trimmed_spec = full_spec.trim(); |
| 106 | + // Strip function name from the beginning of the spec |
| 107 | + if let Some(rest) = trimmed_spec.strip_prefix(fn_name.as_str()) { |
| 108 | + Some(rest.trim().to_string()) |
| 109 | + } else { |
| 110 | + Some(trimmed_spec.to_string()) |
| 111 | + } |
| 112 | + } else { |
| 113 | + None |
| 114 | + }; |
| 115 | + |
| 116 | + let proof = if !proof_lines.is_empty() { Some(proof_lines.join("\n")) } else { None }; |
| 117 | + |
| 118 | + if spec.is_some() || proof.is_some() { |
| 119 | + self.functions.push(ParsedFunction { |
| 120 | + fn_name, |
| 121 | + generics: node.sig.generics.clone(), |
| 122 | + spec, |
| 123 | + proof, |
| 124 | + }); |
| 125 | + } |
| 126 | + |
| 127 | + // Continue visiting children |
| 128 | + visit::visit_item_fn(self, node); |
| 129 | + } |
| 130 | + |
| 131 | + fn visit_item_struct(&mut self, node: &'ast syn::ItemStruct) { |
| 132 | + self.check_attrs_for_misplaced_spec(&node.attrs, "struct"); |
| 133 | + visit::visit_item_struct(self, node); |
| 134 | + } |
| 135 | + |
| 136 | + fn visit_item_enum(&mut self, node: &'ast syn::ItemEnum) { |
| 137 | + self.check_attrs_for_misplaced_spec(&node.attrs, "enum"); |
| 138 | + visit::visit_item_enum(self, node); |
| 139 | + } |
| 140 | + |
| 141 | + fn visit_item_mod(&mut self, node: &'ast syn::ItemMod) { |
| 142 | + self.check_attrs_for_misplaced_spec(&node.attrs, "module"); |
| 143 | + visit::visit_item_mod(self, node); |
| 144 | + } |
| 145 | + |
| 146 | + fn visit_item_const(&mut self, node: &'ast syn::ItemConst) { |
| 147 | + self.check_attrs_for_misplaced_spec(&node.attrs, "const"); |
| 148 | + visit::visit_item_const(self, node); |
| 149 | + } |
| 150 | + |
| 151 | + // Catch-all for other items with attributes? |
| 152 | + // Ideally we'd cover all items, but these are the most common places users might mistakenly put docs. |
| 153 | + // Let's also cover TypeAlias and Trait |
| 154 | + |
| 155 | + fn visit_item_type(&mut self, node: &'ast syn::ItemType) { |
| 156 | + self.check_attrs_for_misplaced_spec(&node.attrs, "type alias"); |
| 157 | + visit::visit_item_type(self, node); |
| 158 | + } |
| 159 | + |
| 160 | + fn visit_item_trait(&mut self, node: &'ast syn::ItemTrait) { |
| 161 | + self.check_attrs_for_misplaced_spec(&node.attrs, "trait"); |
| 162 | + visit::visit_item_trait(self, node); |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +fn parse_doc_attr(attr: &Attribute) -> Option<String> { |
| 167 | + if !attr.path().is_ident("doc") { |
| 168 | + return None; |
| 169 | + } |
| 170 | + |
| 171 | + // syn 2.0: doc = "..." is a NameValue meta |
| 172 | + match &attr.meta { |
| 173 | + syn::Meta::NameValue(nv) => match &nv.value { |
| 174 | + syn::Expr::Lit(syn::ExprLit { lit: syn::Lit::Str(s), .. }) => Some(s.value()), |
| 175 | + _ => None, |
| 176 | + }, |
| 177 | + _ => None, |
| 178 | + } |
| 179 | +} |
| 180 | + |
| 181 | +pub fn extract_blocks(content: &str) -> Result<ExtractedBlocks> { |
| 182 | + let ast = parse_file(content)?; |
| 183 | + let mut visitor = SpecVisitor::new(); |
| 184 | + visitor.visit_file(&ast); |
68 | 185 |
|
69 | | - let mut proofs = Vec::new(); |
70 | | - for cap in PROOF_RE.captures_iter(content) { |
71 | | - proofs.push(ProofBlock { body: cap["body"].trim().to_string() }); |
| 186 | + if !visitor.errors.is_empty() { |
| 187 | + // Return the first error for now, or bundle them |
| 188 | + let msg = visitor.errors.iter().map(|e| e.to_string()).collect::<Vec<_>>().join("\n"); |
| 189 | + bail!("Spec extraction failed:\n{}", msg); |
72 | 190 | } |
73 | 191 |
|
74 | | - Ok(ExtractedBlocks { specs, proofs }) |
| 192 | + Ok(ExtractedBlocks { functions: visitor.functions }) |
75 | 193 | } |
0 commit comments