Skip to content

Commit 9f20265

Browse files
authored
feat: implement query static analysis (#4)
1 parent 3f65433 commit 9f20265

22 files changed

+1588
-260
lines changed

src/analysis.rs

Lines changed: 974 additions & 0 deletions
Large diffs are not rendered by default.

src/ast.rs

Lines changed: 180 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@
1111
//! - [`Value`] - The various kinds of expression values (literals, operators, etc.)
1212
//! - [`Source`] - Data sources in FROM clauses
1313
//!
14-
use crate::token::{Operator, Token};
14+
use std::{collections::BTreeMap, mem};
15+
16+
use crate::{
17+
analysis::{AnalysisOptions, Typed, static_analysis},
18+
error::{AnalysisError, Error},
19+
token::{Operator, Token},
20+
};
1521
use serde::Serialize;
1622

1723
/// Position information for source code locations.
@@ -47,11 +53,12 @@ impl From<Token<'_>> for Pos {
4753

4854
/// Type information for expressions.
4955
///
50-
/// This enum represents the type of an expression in the EventQL type system.
51-
/// Types can be inferred during semantic analysis or left as `Unspecified`.
52-
#[derive(Copy, Clone, PartialEq, Eq, Debug, Serialize)]
56+
/// This enum represents the type of an expression in the E
57+
58+
#[derive(Clone, PartialEq, Eq, Debug, Default, Serialize)]
5359
pub enum Type {
5460
/// Type has not been determined yet
61+
#[default]
5562
Unspecified,
5663
/// Numeric type (f64)
5764
Number,
@@ -60,11 +67,122 @@ pub enum Type {
6067
/// Boolean type
6168
Bool,
6269
/// Array type
63-
Array,
70+
Array(Vec<Type>),
6471
/// Record (object) type
65-
Record,
72+
Record(BTreeMap<String, Type>),
6673
/// Subject pattern type
6774
Subject,
75+
/// Function type
76+
App { args: Vec<Type>, result: Box<Type> },
77+
}
78+
79+
impl Type {
80+
pub fn as_record_or_panic_mut(&mut self) -> &mut BTreeMap<String, Type> {
81+
if let Self::Record(r) = self {
82+
return r;
83+
}
84+
85+
panic!("expected record type, got {:?}", self);
86+
}
87+
88+
/// Checks if two types are the same.
89+
///
90+
/// * If `self` is `Type::Unspecified` then `self` is updated to the more specific `Type`.
91+
/// * If `self` is `Type::Subject` and is checked against a `Type::String` then `self` is updated to `Type::String`
92+
pub fn check(self, attrs: &Attrs, other: Type) -> Result<Type, AnalysisError> {
93+
match (self, other) {
94+
(Self::Unspecified, other) => Ok(other),
95+
(this, Self::Unspecified) => Ok(this),
96+
(Self::Subject, Self::Subject) => Ok(Self::Subject),
97+
98+
// Subjects are strings so there is no reason to reject a type
99+
// when compared to a string. However, when it happens, we demote
100+
// a subject to a string.
101+
(Self::Subject, Self::String) => Ok(Self::String),
102+
(Self::String, Self::Subject) => Ok(Self::String),
103+
104+
(Self::Number, Self::Number) => Ok(Self::Number),
105+
(Self::String, Self::String) => Ok(Self::String),
106+
(Self::Bool, Self::Bool) => Ok(Self::Bool),
107+
108+
(Self::Array(mut a), Self::Array(b)) if a.len() == b.len() => {
109+
if a.is_empty() {
110+
return Ok(Self::Array(a));
111+
}
112+
113+
for (a, b) in a.iter_mut().zip(b.into_iter()) {
114+
let tmp = mem::take(a);
115+
*a = tmp.check(attrs, b)?;
116+
}
117+
118+
Ok(Self::Array(a))
119+
}
120+
121+
(Self::Record(mut a), Self::Record(b)) if a.len() == b.len() => {
122+
if a.is_empty() {
123+
return Ok(Self::Record(a));
124+
}
125+
126+
for (ak, bk) in a.keys().zip(b.keys()) {
127+
if ak != bk {
128+
return Err(AnalysisError::TypeMismatch(
129+
attrs.pos.line,
130+
attrs.pos.col,
131+
Self::Record(a),
132+
Self::Record(b),
133+
));
134+
}
135+
}
136+
137+
for (av, bv) in a.values_mut().zip(b.into_values()) {
138+
let a = mem::take(av);
139+
*av = a.check(attrs, bv)?;
140+
}
141+
142+
Ok(Self::Record(a))
143+
}
144+
145+
(
146+
Self::App {
147+
args: mut a_args,
148+
result: mut a_res,
149+
},
150+
Self::App {
151+
args: b_args,
152+
result: b_res,
153+
},
154+
) if a_args.len() == b_args.len() => {
155+
if a_args.is_empty() {
156+
let tmp = mem::take(a_res.as_mut());
157+
*a_res = tmp.check(attrs, *b_res)?;
158+
return Ok(Self::App {
159+
args: a_args,
160+
result: a_res,
161+
});
162+
}
163+
164+
for (a, b) in a_args.iter_mut().zip(b_args.into_iter()) {
165+
let tmp = mem::take(a);
166+
*a = tmp.check(attrs, b)?;
167+
}
168+
169+
let tmp = mem::take(a_res.as_mut());
170+
*a_res = tmp.check(attrs, *b_res)?;
171+
172+
Ok(Self::App {
173+
args: a_args,
174+
result: a_res,
175+
})
176+
}
177+
178+
(this, other) => Err(AnalysisError::TypeMismatch(
179+
attrs.pos.line,
180+
attrs.pos.col,
181+
this,
182+
other,
183+
)),
184+
}
185+
}
68186
}
69187

70188
/// Attributes attached to each expression node.
@@ -75,20 +193,12 @@ pub enum Type {
75193
pub struct Attrs {
76194
/// Source position of this expression
77195
pub pos: Pos,
78-
/// Scope level (0 for top-level, incremented for subqueries)
79-
pub scope: u64,
80-
/// Type of this expression
81-
pub tpe: Type,
82196
}
83197

84198
impl Attrs {
85199
/// Create new attributes with unspecified type.
86-
pub fn new(pos: Pos, scope: u64) -> Self {
87-
Self {
88-
pos,
89-
scope,
90-
tpe: Type::Unspecified,
91-
}
200+
pub fn new(pos: Pos) -> Self {
201+
Self { pos }
92202
}
93203
}
94204

@@ -212,6 +322,18 @@ pub enum Value {
212322
Group(Box<Expr>),
213323
}
214324

325+
/// A source binding. A name attached to a source of events.
326+
///
327+
/// # Examples
328+
/// in `FROM e IN events`, `e` is the binding.
329+
#[derive(Debug, Clone, Serialize)]
330+
pub struct Binding {
331+
/// Name attached to a source of events
332+
pub name: String,
333+
/// Position in the source code where that binding was introduced
334+
pub pos: Pos,
335+
}
336+
215337
/// A data source in a FROM clause.
216338
///
217339
/// Sources specify where data comes from in a query. Each source has a binding
@@ -223,11 +345,11 @@ pub enum Value {
223345
/// - `binding`: `"e"`
224346
/// - `kind`: `SourceKind::Name("events")`
225347
#[derive(Debug, Clone, Serialize)]
226-
pub struct Source {
348+
pub struct Source<A> {
227349
/// Variable name bound to this source
228-
pub binding: String,
350+
pub binding: Binding,
229351
/// What this source represents
230-
pub kind: SourceKind,
352+
pub kind: SourceKind<A>,
231353
}
232354

233355
/// The kind of data source.
@@ -237,13 +359,13 @@ pub struct Source {
237359
/// - Subject patterns (e.g., `FROM e IN "users/john"`)
238360
/// - Subqueries (e.g., `FROM e IN (SELECT ...)`)
239361
#[derive(Debug, Clone, Serialize)]
240-
pub enum SourceKind {
362+
pub enum SourceKind<A> {
241363
/// Named source (identifier)
242364
Name(String),
243365
/// Subject pattern (string literal used as event subject pattern)
244366
Subject(String),
245367
/// Nested subquery
246-
Subquery(Box<Query>),
368+
Subquery(Box<Query<A>>),
247369
}
248370

249371
/// ORDER BY clause specification.
@@ -309,6 +431,12 @@ pub enum Limit {
309431
Top(u64),
310432
}
311433

434+
/// Represents the state of a query that only has a valid syntax. There are no guarantee that all
435+
/// the variables exists or that the query is sound. For example, if the user is asking for an event
436+
/// that has field that should be a string or a number at the same time.
437+
#[derive(Debug, Clone, Copy, Serialize)]
438+
pub struct Raw;
439+
312440
/// A complete EventQL query.
313441
///
314442
/// This is the root node of the AST, representing a full query with all its clauses.
@@ -345,11 +473,11 @@ pub enum Limit {
345473
/// assert!(query.limit.is_some());
346474
/// ```
347475
#[derive(Debug, Clone, Serialize)]
348-
pub struct Query {
476+
pub struct Query<A> {
349477
/// Metadata about this query
350478
pub attrs: Attrs,
351479
/// FROM clause sources (must have at least one)
352-
pub sources: Vec<Source>,
480+
pub sources: Vec<Source<A>>,
353481
/// Optional WHERE clause filter predicate
354482
pub predicate: Option<Expr>,
355483
/// Optional GROUP BY clause expression
@@ -362,4 +490,33 @@ pub struct Query {
362490
pub projection: Expr,
363491
/// Remove duplicate rows from the query's results
364492
pub distinct: bool,
493+
/// Type-level metadata about the query's analysis state.
494+
///
495+
/// This field uses a generic type parameter to track whether the query
496+
/// is in a raw (unparsed/untyped) state or has been statically analyzed:
497+
/// - `Query<Raw>`: Query parsed but not yet type-checked
498+
/// - `Query<Typed>`: Query that has passed static analysis with validated
499+
/// types and variable scopes
500+
///
501+
/// This provides compile-time guarantees about the query's type safety.
502+
pub meta: A,
503+
}
504+
505+
impl Query<Raw> {
506+
/// Performs static analysis on this raw query.
507+
///
508+
/// This is a convenience method that runs type checking and variable scoping
509+
/// analysis on the query, converting it from a raw (untyped) query to a
510+
/// typed query.
511+
///
512+
/// # Arguments
513+
///
514+
/// * `options` - Configuration containing type information and default scope
515+
///
516+
/// # Returns
517+
///
518+
/// Returns a typed query on success, or an error if type checking fails.
519+
pub fn run_static_analysis(self, options: &AnalysisOptions) -> crate::Result<Query<Typed>> {
520+
static_analysis(options, self).map_err(Error::Analysis)
521+
}
365522
}

0 commit comments

Comments
 (0)