diff --git a/crates/flowistry_pdg/src/pdg.rs b/crates/flowistry_pdg/src/pdg.rs index fa7f07596c..9fc4a3d630 100644 --- a/crates/flowistry_pdg/src/pdg.rs +++ b/crates/flowistry_pdg/src/pdg.rs @@ -331,13 +331,40 @@ pub enum Constant { Uint(u64), Char(char), // Placeholder. Floats in the rust compiler are a bit weird so I'll skip them for now. - //Float(f64), + Float(FloatWrapper), Bool(bool), String(Intern), //Unknown(Intern), Zst(Intern), } +/// This is an unsafe wrapper around f64 in that it defines hash and equality +/// based on bit representation. This is not in line with float semantics. +/// +/// But I really need this to be hash and eq, hence the hack. +#[derive(Clone, Copy, Serialize, Deserialize, Debug)] +#[serde(transparent)] +pub struct FloatWrapper(pub f64); + +impl std::cmp::PartialEq for FloatWrapper { + fn eq(&self, other: &Self) -> bool { + unsafe { + std::mem::transmute::<&f64, &u64>(&self.0) + == std::mem::transmute::<&f64, &u64>(&other.0) + } + } +} + +impl std::cmp::Eq for FloatWrapper {} + +impl std::hash::Hash for FloatWrapper { + fn hash(&self, state: &mut H) { + unsafe { + std::mem::transmute::<&f64, &u64>(&self.0).hash(state); + } + } +} + impl Constant { pub fn int(i: impl Into) -> Self { Self::Int(i.into()) @@ -368,6 +395,7 @@ impl std::fmt::Display for Constant { Self::Uint(u) => Display::fmt(u, f), Self::Char(c) => Display::fmt(c, f), Self::String(s) => Debug::fmt(s, f), + Self::Float(fl) => Display::fmt(&fl.0, f), Self::Zst(s) => f.write_str(s), //Self::Unknown(u) => write!(f, "Unsupported constant: {u}"), } diff --git a/crates/flowistry_pdg_construction/src/analysis/async_support.rs b/crates/flowistry_pdg_construction/src/analysis/async_support.rs index 7dade8dfc2..a8abbb831f 100644 --- a/crates/flowistry_pdg_construction/src/analysis/async_support.rs +++ b/crates/flowistry_pdg_construction/src/analysis/async_support.rs @@ -6,10 +6,10 @@ use rustc_abi::{FieldIdx, VariantIdx}; use rustc_hir::def_id::DefId; use rustc_middle::{ mir::{ - AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, + visit, AggregateKind, BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }, - ty::{GenericArgsRef, Instance, TyCtxt}, + ty::{self, GenericArgsRef, Instance, TyCtxt}, }; use rustc_span::{source_map::Spanned, Span}; @@ -26,6 +26,17 @@ use crate::utils; pub enum AsyncType { Fn, Trait, + Tool, +} + +impl AsyncType { + pub fn describe(&self) -> &'static str { + match self { + AsyncType::Fn => "async function", + AsyncType::Trait => "async trait", + AsyncType::Tool => "async tool", + } + } } /// Context for a call to [`Future::poll`](std::future::Future::poll), when @@ -70,37 +81,22 @@ pub fn try_as_async_trait_function<'tcx>( if !has_async_trait_signature(tcx, def_id) { return None; } - let mut matching_statements = - body.basic_blocks - .iter_enumerated() - .flat_map(|(block, bbdat)| { - bbdat.statements.iter().enumerate().filter_map( - move |(statement_index, statement)| { - let (def_id, generics) = match_async_trait_assign(statement)?; - Some(( - def_id, - generics, - Location { - block, - statement_index, - }, - )) - }, - ) - }) - .collect::>(); - assert_eq!(matching_statements.len(), 1); - matching_statements.pop() + let (def_id, generics, loc, _) = find_coroutine_assign(body); + Some((def_id, generics, loc)) } -pub fn match_async_trait_assign<'tcx>( - statement: &Statement<'tcx>, -) -> Option<(DefId, GenericArgsRef<'tcx>)> { +pub fn match_coroutine_assign<'tcx, 'a>( + statement: &'a Statement<'tcx>, +) -> Option<( + DefId, + GenericArgsRef<'tcx>, + &'a rustc_index::IndexVec>, +)> { match &statement.kind { StatementKind::Assign(box ( _, - Rvalue::Aggregate(box AggregateKind::Coroutine(def_id, generic_args), _args), - )) => Some((*def_id, *generic_args)), + Rvalue::Aggregate(box AggregateKind::Coroutine(def_id, generic_args), args), + )) => Some((*def_id, *generic_args, args)), _ => None, } } @@ -121,7 +117,6 @@ fn has_async_trait_signature(tcx: TyCtxt, def_id: DefId) -> bool { } } -use rustc_middle::ty; fn match_pin_box_dyn_ty(lang_items: &rustc_hir::LanguageItems, t: ty::Ty) -> bool { let ty::TyKind::Adt(pin_ty, args) = t.kind() else { return false; @@ -166,6 +161,79 @@ fn get_async_generator<'tcx>(body: &Body<'tcx>) -> (DefId, GenericArgsRef<'tcx>, (*def_id, generic_args, location) } +// matches std::pin::Pin> +fn has_async_tool_signature(tcx: TyCtxt<'_>, def_id: DefId) -> bool { + use rustc_hir::def::DefKind; + if !matches!(tcx.def_kind(def_id), DefKind::Fn | DefKind::AssocFn) { + return false; + } + let sig = tcx.fn_sig(def_id); + let lang_items = tcx.lang_items(); + let ty::TyKind::Adt(adt_def, inner) = sig.skip_binder().output().skip_binder().kind() else { + return false; + }; + if !lang_items.pin_type().is_some_and(|p| p == adt_def.did()) { + return false; + } + let [b_ty] = inner.as_slice() else { + return false; + }; + let Some(f_ty) = b_ty.as_type().and_then(ty::Ty::boxed_ty) else { + return false; + }; + let ty::TyKind::Dynamic(preds, _, ty::DynKind::Dyn) = f_ty.kind() else { + return false; + }; + let Some(ty::ExistentialPredicate::Trait(t)) = preds.first().map(|b| b.skip_binder()) else { + return false; + }; + lang_items.future_trait().is_some_and(|f| f == t.def_id) +} + +pub fn find_coroutine_assign<'tcx, 'a>( + body: &'a Body<'tcx>, +) -> ( + DefId, + GenericArgsRef<'tcx>, + Location, + &'a rustc_index::IndexVec>, +) { + let mut matching_statements = + body.basic_blocks + .iter_enumerated() + .flat_map(|(block, bbdat)| { + bbdat.statements.iter().enumerate().filter_map( + move |(statement_index, statement)| { + let (def_id, generics, args) = match_coroutine_assign(statement)?; + Some(( + def_id, + generics, + Location { + block, + statement_index, + }, + args, + )) + }, + ) + }) + .collect::>(); + assert_eq!(matching_statements.len(), 1); + matching_statements.pop().unwrap() +} + +fn try_as_async_tool<'tcx>( + tcx: TyCtxt<'tcx>, + def_id: DefId, + body: &Body<'tcx>, +) -> Option<(DefId, GenericArgsRef<'tcx>, Location)> { + if !has_async_tool_signature(tcx, def_id) { + return None; + } + let (def_id, gargs, loc, _) = find_coroutine_assign(body); + Some((def_id, gargs, loc)) +} + /// Try to interpret this function as an async function. /// /// If this is an async function it returns the [`Instance`] of the generator, @@ -178,11 +246,10 @@ pub fn determine_async<'tcx>( ) -> Option<(Instance<'tcx>, Location, AsyncType)> { let ((generator_def_id, args, loc), asyncness) = if is_async(tcx, def_id) { (get_async_generator(body), AsyncType::Fn) + } else if let Some(g) = try_as_async_trait_function(tcx, def_id, body) { + (g, AsyncType::Trait) } else { - ( - try_as_async_trait_function(tcx, def_id, body)?, - AsyncType::Trait, - ) + (try_as_async_tool(tcx, def_id, body)?, AsyncType::Tool) }; let typing_env = body.typing_env(tcx).with_post_analysis_normalized(tcx); let generator_fn = utils::try_resolve_function(tcx, generator_def_id, typing_env, args)?; diff --git a/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs b/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs index 0e0213beca..74aa62ac0f 100644 --- a/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs +++ b/crates/flowistry_pdg_construction/src/analysis/global/call_tree_visit.rs @@ -12,8 +12,12 @@ use std::{borrow::Cow, hash::Hash, rc::Rc}; +use flowistry::mir::FlowistryInput; use flowistry_pdg::{CallString, GlobalLocation, RichLocation}; -use rustc_middle::{mir::Location, ty::Instance}; +use rustc_middle::{ + mir::{self, Location}, + ty::Instance, +}; use crate::{analysis::global::partial_graph::NodeKey, DepNodeKind, MemoPdgConstructor}; @@ -80,6 +84,13 @@ impl<'tcx, 'c, K: Clone> VisitDriver<'tcx, 'c, K> { }, ) } + + pub fn current_body(&self) -> &'tcx mir::Body<'tcx> { + self.memo + .body_cache() + .get(self.current_function().def_id()) + .body() + } } pub trait Visitor<'tcx, K: Hash + Eq + Clone> { diff --git a/crates/flowistry_pdg_construction/src/analysis/global/mod.rs b/crates/flowistry_pdg_construction/src/analysis/global/mod.rs index 03603bd6f9..74d2f23669 100644 --- a/crates/flowistry_pdg_construction/src/analysis/global/mod.rs +++ b/crates/flowistry_pdg_construction/src/analysis/global/mod.rs @@ -25,7 +25,7 @@ use crate::{ callback::DefaultCallback, constants::PlaceOrConst, source_access::{self, BodyCache, CachedBody}, - utils::{manufacture_substs_for, try_resolve_function, TwoLevelCache}, + utils::{manufacture_substs_for, try_resolve_function, PlaceConflictContext, TwoLevelCache}, CallChangeCallback, }; @@ -84,6 +84,7 @@ pub struct MemoPdgConstructor<'tcx, K> { pub(crate) body_cache: Rc>, disable_cache: bool, relaxed: bool, + pub(crate) place_conflict_context: PlaceConflictContext<'tcx>, } impl<'tcx, K: Default> MemoPdgConstructor<'tcx, K> { @@ -105,6 +106,7 @@ impl<'tcx, K: Default> MemoPdgConstructor<'tcx, K> { body_cache, disable_cache: false, relaxed: false, + place_conflict_context: PlaceConflictContext::new(tcx), } } } @@ -124,6 +126,7 @@ impl<'tcx, K> MemoPdgConstructor<'tcx, K> { body_cache: Rc::new(BodyCache::new(tcx)), disable_cache: false, relaxed: false, + place_conflict_context: PlaceConflictContext::new(tcx), } } @@ -658,7 +661,7 @@ enum Input<'tcx> { }, } -struct GraphSizeEstimator { +pub struct GraphSizeEstimator { nodes: usize, edges: usize, functions: usize, @@ -667,7 +670,7 @@ struct GraphSizeEstimator { #[allow(dead_code)] impl GraphSizeEstimator { - fn new() -> Self { + pub fn new() -> Self { Self { nodes: 0, edges: 0, @@ -676,7 +679,7 @@ impl GraphSizeEstimator { } } - fn format_size(&self) -> String { + pub fn format_size(&self) -> String { format!( "nodes: {}, edges: {}, functions: {}, call_string_length: {}", HumanInt(self.nodes), diff --git a/crates/flowistry_pdg_construction/src/analysis/local/mod.rs b/crates/flowistry_pdg_construction/src/analysis/local/mod.rs index 0e80d743a7..c238b2ddb7 100644 --- a/crates/flowistry_pdg_construction/src/analysis/local/mod.rs +++ b/crates/flowistry_pdg_construction/src/analysis/local/mod.rs @@ -285,7 +285,11 @@ impl<'tcx, 'a, K> LocalAnalysis<'tcx, 'a, K> { place.local == alias.local } else { trace!("Checking conflict status of {place:?} and {alias:?}"); - utils::places_conflict(self.tcx(), &self.mono_body, *place, alias) + self.memo.place_conflict_context.places_conflict( + &self.mono_body, + *place, + alias, + ) } }); @@ -366,7 +370,7 @@ impl<'tcx, 'a, K> LocalAnalysis<'tcx, 'a, K> { } fn fmt_fn(&self, def_id: DefId) -> String { - self.tcx().def_path_str(def_id) + format!("{} ({}:{})", self.tcx().def_path_str(def_id), def_id.krate.as_u32(), def_id.index.as_u32()) } } @@ -757,7 +761,17 @@ impl<'tcx, 'a, K: Hash + Eq + Clone> LocalAnalysis<'tcx, 'a, K> { } else { continue; }; - let src = final_state.get_place_node(*place, location.into()).unwrap(); + let Some(src) = final_state.get_place_node(*place, location.into()) else { + let span = match location { + RichLocation::Location(l) => self.mono_body.source_info(*l).span, + _ => self.mono_body.span, + }; + // CORNER CUTTING: we should investigate why this happens. + self.tcx() + .dcx() + .span_warn(span, format!("could not find reference to {place:?} here")); + continue; + }; let eloc = RichLocation::End; let key = NodeKey::for_place(*place, eloc.into()); let prior = final_state.get_node(&key); diff --git a/crates/flowistry_pdg_construction/src/constants.rs b/crates/flowistry_pdg_construction/src/constants.rs index 7f1e32bc49..24c15ac1d9 100644 --- a/crates/flowistry_pdg_construction/src/constants.rs +++ b/crates/flowistry_pdg_construction/src/constants.rs @@ -1,6 +1,6 @@ -use std::hash::Hash; +use std::{hash::Hash, num::ParseFloatError}; -use flowistry_pdg::Constant; +use flowistry_pdg::{Constant, FloatWrapper}; use internment::Intern; use rustc_middle::{ @@ -102,6 +102,7 @@ pub enum ConstConversionError<'tcx> { UnsupportedConstType(mir::Const<'tcx>), Integer128NotSupported { signed: bool }, EvalFailed(mir::Const<'tcx>), + CannotParseFloat(ParseFloatError), } impl<'tcx> ConstConversionError<'tcx> { @@ -143,6 +144,9 @@ impl std::fmt::Display for ConstConversionError<'_> { ConstConversionError::EvalFailed(c) => { write!(f, "Evaluation failed for constant: {:?}", c) } + ConstConversionError::CannotParseFloat(e) => { + write!(f, "Cannot parse float: {}", e) + } } } } @@ -168,56 +172,58 @@ fn constant_from_const_value<'tcx>( ty: ty::Ty<'tcx>, ct: &mir::ConstValue<'tcx>, ) -> Result> { + let default_err = Err(ConstConversionError::UnsupportedConstType(mir::Const::Val( + *ct, ty, + ))); // largely from rustc_middle/mir/pretty.rs:1952-1962 match (ct, ty.kind()) { (_, ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Str) => { if let Some(data) = ct.try_get_slice_bytes_for_diagnostics(tcx) { - return Ok(Constant::String(Intern::from_ref( + Ok(Constant::String(Intern::from_ref( String::from_utf8_lossy(data).as_ref(), - ))); + ))) + } else { + default_err } } (mir::ConstValue::Scalar(mir::interpret::Scalar::Int(int)), tyk) => match tyk { - ty::Bool => return Ok(Constant::Bool(int.try_to_bool().unwrap())), + ty::Bool => Ok(Constant::Bool(int.try_to_bool().unwrap())), // Skipping floats for now. - // ty::Float(fty) => Self::Float(match fty { - // ty::FloatTy::F16 => int.to_f16() as f64, - // ty::FloatTy::F32 => int.to_f32() as f64, - // ty::FloatTy::F64 => int.to_f64() as f64, - // }), - ty::Int(ity) => { - return Ok(Constant::Int(match ity { - ty::IntTy::I8 => int.to_u8() as i64, - ty::IntTy::I16 => int.to_u16() as i64, - ty::IntTy::I32 => int.to_u32() as i64, - ty::IntTy::I64 => int.to_u64() as i64, - ty::IntTy::Isize => int.to_target_isize(tcx), - ty::IntTy::I128 => { - return Err(ConstConversionError::Integer128NotSupported { signed: true }) - } - })) - } - ty::Uint(uty) => { - return Ok(Constant::Uint(match uty { - ty::UintTy::U8 => int.to_u8() as u64, - ty::UintTy::U16 => int.to_u16() as u64, - ty::UintTy::U32 => int.to_u32() as u64, - ty::UintTy::U64 => int.to_u64(), - ty::UintTy::Usize => int.to_target_usize(tcx), - ty::UintTy::U128 => { - return Err(ConstConversionError::Integer128NotSupported { signed: false }) - } - })) - } - ty::Char => { - return Ok(Constant::Char(int.to_u32() as u8 as char)); - } - _ => (), + ty::Float(fty) => Ok(Constant::Float( + match fty { + ty::FloatTy::F16 => int.to_f16().to_string(), + ty::FloatTy::F32 => int.to_f32().to_string(), + ty::FloatTy::F64 => int.to_f64().to_string(), + _ => return default_err, + } + .parse() + .map(FloatWrapper) + .map_err(ConstConversionError::CannotParseFloat)?, + )), + ty::Int(ity) => Ok(Constant::Int(match ity { + ty::IntTy::I8 => int.to_u8() as i64, + ty::IntTy::I16 => int.to_u16() as i64, + ty::IntTy::I32 => int.to_u32() as i64, + ty::IntTy::I64 => int.to_u64() as i64, + ty::IntTy::Isize => int.to_target_isize(tcx), + ty::IntTy::I128 => { + return Err(ConstConversionError::Integer128NotSupported { signed: true }) + } + })), + ty::Uint(uty) => Ok(Constant::Uint(match uty { + ty::UintTy::U8 => int.to_u8() as u64, + ty::UintTy::U16 => int.to_u16() as u64, + ty::UintTy::U32 => int.to_u32() as u64, + ty::UintTy::U64 => int.to_u64(), + ty::UintTy::Usize => int.to_target_usize(tcx), + ty::UintTy::U128 => { + return Err(ConstConversionError::Integer128NotSupported { signed: false }) + } + })), + ty::Char => Ok(Constant::Char(int.to_u32() as u8 as char)), + _ => default_err, }, - (mir::ConstValue::ZeroSized, t) => return Ok(Constant::Zst(Intern::new(format!("{t:?}")))), - _ => (), + (mir::ConstValue::ZeroSized, t) => Ok(Constant::Zst(Intern::new(format!("{t:?}")))), + _ => default_err, } - Err(ConstConversionError::UnsupportedConstType(mir::Const::Val( - *ct, ty, - ))) } diff --git a/crates/flowistry_pdg_construction/src/lib.rs b/crates/flowistry_pdg_construction/src/lib.rs index 900973f019..b6cd90642e 100644 --- a/crates/flowistry_pdg_construction/src/lib.rs +++ b/crates/flowistry_pdg_construction/src/lib.rs @@ -27,11 +27,11 @@ pub mod constants; pub mod source_access; pub mod utils; -pub use analysis::async_support::{determine_async, is_async_trait_fn, match_async_trait_assign}; +pub use analysis::async_support::{self, determine_async, is_async_trait_fn}; pub use analysis::global::call_tree_visit::{VisitDriver, Visitor}; pub use analysis::global::{ - DepEdge, DepEdgeKind, DepNode, DepNodeKind, MemoPdgConstructor, Node, OneHopLocation, - PartialGraph, Use, + DepEdge, DepEdgeKind, DepNode, DepNodeKind, GraphSizeEstimator, MemoPdgConstructor, Node, + OneHopLocation, PartialGraph, Use, }; pub use analysis::CallingConvention; pub use callback::{ diff --git a/crates/flowistry_pdg_construction/src/utils/mod.rs b/crates/flowistry_pdg_construction/src/utils/mod.rs index 15bff17375..3d76a5885f 100644 --- a/crates/flowistry_pdg_construction/src/utils/mod.rs +++ b/crates/flowistry_pdg_construction/src/utils/mod.rs @@ -506,436 +506,493 @@ pub fn assert_resolved<'tcx>(rvalue: &impl TypeVisitable>, msg: imp // slight alterations that do not throw an error for nested references // ################################################################################################# -/// Helper function for checking if places conflict with a mutable borrow and deep access depth. -/// This is used to check for places conflicting outside of the borrow checking code (such as in -/// dataflow). -pub fn places_conflict<'tcx>( +pub struct PlaceConflictContext<'tcx> { tcx: TyCtxt<'tcx>, - body: &Body<'tcx>, - borrow_place: Place<'tcx>, - access_place: Place<'tcx>, -) -> bool { - let borrow_local = borrow_place.local; - let access_local = access_place.local; - let access_place = access_place.as_ref(); + unique_ptr_def_id: DefId, +} - if borrow_local != access_local { - // We have proven the borrow disjoint - further projections will remain disjoint. - return false; - } +#[derive(Clone, Copy)] +pub(crate) enum Overlap { + Arbitrary, + EqualOrDisjoint, + Disjoint, +} - // This Local/Local case is handled by the more general code below, but - // it's so common that it's a speed win to check for it first. - if borrow_place.projection.is_empty() && access_place.projection.is_empty() { - return true; +impl<'tcx> PlaceConflictContext<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> Self { + let core_sym = rustc_span::Symbol::intern("core"); + let core = tcx + .crates(()) + .iter() + .copied() + .find(|c| tcx.crate_name(*c) == core_sym) + .expect("core crate not found"); + let find_child = |m, n| { + let sym = rustc_span::Symbol::intern(n); + tcx.module_children(m) + .iter() + .find(|m| m.ident.name == sym) + .and_then(|m| m.res.opt_def_id()) + .expect(&format!("Could not find module {n}")) + }; + let ptr_mod = find_child(core.as_def_id(), "ptr"); + let unique_ptr_def_id = find_child(ptr_mod, "Unique"); + Self { + tcx, + unique_ptr_def_id, + } } - // loop invariant: borrow_c is always either equal to access_c or disjoint from it. - for ((borrow_place, borrow_c), &access_c) in - std::iter::zip(borrow_place.iter_projections(), access_place.projection) - { - // Borrow and access path both have more components. - // - // Examples: - // - // - borrow of `a.(...)`, access to `a.(...)` - // - borrow of `a.(...)`, access to `b.(...)` - // - // Here we only see the components we have checked so - // far (in our examples, just the first component). We - // check whether the components being borrowed vs - // accessed are disjoint (as in the second example, - // but not the first). - match place_projection_conflict( - tcx, - body, - borrow_place, - borrow_c, - access_c, - PlaceConflictBias::Overlap, - ) { - Overlap::Arbitrary => { - // We have encountered different fields of potentially - // the same union - the borrow now partially overlaps. - // - // There is no *easy* way of comparing the fields - // further on, because they might have different types - // (e.g., borrows of `u.a.0` and `u.b.y` where `.0` and - // `.y` come from different structs). - // - // We could try to do some things here - e.g., count - // dereferences - but that's probably not a good - // idea, at least for now, so just give up and - // report a conflict. This is unsafe code anyway so - // the user could always use raw pointers. - return true; - } - Overlap::EqualOrDisjoint => { - // This is the recursive case - proceed to the next element. - } - Overlap::Disjoint => { - // We have proven the borrow disjoint - further - // projections will remain disjoint. - return false; - } + fn is_unique_ptr(&self, ty: Ty<'tcx>) -> bool { + match ty.kind() { + ty::TyKind::Adt(def, _) => def.did() == self.unique_ptr_def_id, + _ => false, } } - if borrow_place.projection.len() > access_place.projection.len() { - for (base, elem) in borrow_place - .iter_projections() - .skip(access_place.projection.len()) + /// Helper function for checking if places conflict with a mutable borrow and deep access depth. + /// This is used to check for places conflicting outside of the borrow checking code (such as in + /// dataflow). + pub fn places_conflict( + &self, + body: &Body<'tcx>, + borrow_place: Place<'tcx>, + access_place: Place<'tcx>, + ) -> bool { + let tcx = self.tcx; + let borrow_local = borrow_place.local; + let access_local = access_place.local; + let access_place = access_place.as_ref(); + + if borrow_local != access_local { + // We have proven the borrow disjoint - further projections will remain disjoint. + return false; + } + + // This Local/Local case is handled by the more general code below, but + // it's so common that it's a speed win to check for it first. + if borrow_place.projection.is_empty() && access_place.projection.is_empty() { + return true; + } + + // loop invariant: borrow_c is always either equal to access_c or disjoint from it. + for ((borrow_place, borrow_c), &access_c) in + std::iter::zip(borrow_place.iter_projections(), access_place.projection) { - // Borrow path is longer than the access path. Examples: + // Borrow and access path both have more components. // - // - borrow of `a.b.c`, access to `a.b` + // Examples: // - // Here, we know that the borrow can access a part of - // our place. This is a conflict if that is a part our - // access cares about. - - let base_ty = base.ty(body, tcx).ty; - - match (elem, &base_ty.kind()) { - (ProjectionElem::Deref, ty::Ref(_, _, hir::Mutability::Not)) => { - // This occurs only in two cases. Either we have a reference - // as an argument, which causes queries such as - // conflicting("(*_1)", "_2") or if we have a raw pointer in - // the mix. In the reference case the alias analysis will - // already keep track of the conflict. Raw pointers by - // themselves are not soundly supported. However this can - // also occur via a manual "deref" (or somesuch), on which - // case we rely on the lifetimes declared on those functions - // to be correct and then our alias analysis will pick it up - // correctly. - return false; + // - borrow of `a.(...)`, access to `a.(...)` + // - borrow of `a.(...)`, access to `b.(...)` + // + // Here we only see the components we have checked so + // far (in our examples, just the first component). We + // check whether the components being borrowed vs + // accessed are disjoint (as in the second example, + // but not the first). + match self.place_projection_conflict( + body, + borrow_place, + borrow_c, + access_c, + PlaceConflictBias::Overlap, + ) { + Overlap::Arbitrary => { + // We have encountered different fields of potentially + // the same union - the borrow now partially overlaps. + // + // There is no *easy* way of comparing the fields + // further on, because they might have different types + // (e.g., borrows of `u.a.0` and `u.b.y` where `.0` and + // `.y` come from different structs). + // + // We could try to do some things here - e.g., count + // dereferences - but that's probably not a good + // idea, at least for now, so just give up and + // report a conflict. This is unsafe code anyway so + // the user could always use raw pointers. + return true; } - (ProjectionElem::Deref, _) - | (ProjectionElem::Field { .. }, _) - | (ProjectionElem::Index { .. }, _) - | (ProjectionElem::ConstantIndex { .. }, _) - | (ProjectionElem::Subslice { .. }, _) - | (ProjectionElem::OpaqueCast { .. }, _) - | (ProjectionElem::Subtype(_), _) - | (ProjectionElem::Downcast { .. }, _) => { - // Recursive case. This can still be disjoint on a - // further iteration if this a shallow access and - // there's a deref later on, e.g., a borrow - // of `*x.y` while accessing `x`. + Overlap::EqualOrDisjoint => { + // This is the recursive case - proceed to the next element. + } + Overlap::Disjoint => { + // We have proven the borrow disjoint - further + // projections will remain disjoint. + return false; } } } - } - true -} + if borrow_place.projection.len() > access_place.projection.len() { + for (base, elem) in borrow_place + .iter_projections() + .skip(access_place.projection.len()) + { + // Borrow path is longer than the access path. Examples: + // + // - borrow of `a.b.c`, access to `a.b` + // + // Here, we know that the borrow can access a part of + // our place. This is a conflict if that is a part our + // access cares about. + + let base_ty = base.ty(body, tcx).ty; + + match (elem, &base_ty.kind()) { + (ProjectionElem::Deref, ty::Ref(_, _, hir::Mutability::Not)) => { + // This occurs only in two cases. Either we have a reference + // as an argument, which causes queries such as + // conflicting("(*_1)", "_2") or if we have a raw pointer in + // the mix. In the reference case the alias analysis will + // already keep track of the conflict. Raw pointers by + // themselves are not soundly supported. However this can + // also occur via a manual "deref" (or somesuch), on which + // case we rely on the lifetimes declared on those functions + // to be correct and then our alias analysis will pick it up + // correctly. + return false; + } + (ProjectionElem::Deref, _) + | (ProjectionElem::Field { .. }, _) + | (ProjectionElem::Index { .. }, _) + | (ProjectionElem::ConstantIndex { .. }, _) + | (ProjectionElem::Subslice { .. }, _) + | (ProjectionElem::OpaqueCast { .. }, _) + | (ProjectionElem::Subtype(_), _) + | (ProjectionElem::Downcast { .. }, _) => { + // Recursive case. This can still be disjoint on a + // further iteration if this a shallow access and + // there's a deref later on, e.g., a borrow + // of `*x.y` while accessing `x`. + } + } + } + } -#[derive(Clone, Copy)] -pub(crate) enum Overlap { - Arbitrary, - EqualOrDisjoint, - Disjoint, -} + true + } -// Given that the bases of `elem1` and `elem2` are always either equal -// or disjoint (and have the same type!), return the overlap situation -// between `elem1` and `elem2`. -fn place_projection_conflict<'tcx>( - tcx: TyCtxt<'tcx>, - body: &Body<'tcx>, - pi1: PlaceRef<'tcx>, - pi1_elem: PlaceElem<'tcx>, - pi2_elem: PlaceElem<'tcx>, - bias: PlaceConflictBias, -) -> Overlap { - match (pi1_elem, pi2_elem) { - (ProjectionElem::Deref, ProjectionElem::Deref) => { - // derefs (e.g., `*x` vs. `*x`) - recur. - Overlap::EqualOrDisjoint - } - (ProjectionElem::OpaqueCast(_), ProjectionElem::OpaqueCast(_)) => { - // casts to other types may always conflict irrespective of the type being cast to. - Overlap::EqualOrDisjoint - } - (ProjectionElem::Field(f1, _), ProjectionElem::Field(f2, _)) => { - if f1 == f2 { - // same field (e.g., `a.y` vs. `a.y`) - recur. + // Given that the bases of `elem1` and `elem2` are always either equal + // or disjoint (and have the same type!), return the overlap situation + // between `elem1` and `elem2`. + fn place_projection_conflict( + &self, + body: &Body<'tcx>, + pi1: PlaceRef<'tcx>, + pi1_elem: PlaceElem<'tcx>, + pi2_elem: PlaceElem<'tcx>, + bias: PlaceConflictBias, + ) -> Overlap { + let tcx = self.tcx; + match (pi1_elem, pi2_elem) { + (ProjectionElem::Deref, ProjectionElem::Deref) => { + // derefs (e.g., `*x` vs. `*x`) - recur. Overlap::EqualOrDisjoint - } else { - let ty = pi1.ty(body, tcx).ty; - if ty.is_union() { - // Different fields of a union, we are basically stuck. - Overlap::Arbitrary + } + (ProjectionElem::OpaqueCast(_), ProjectionElem::OpaqueCast(_)) => { + // casts to other types may always conflict irrespective of the type being cast to. + Overlap::EqualOrDisjoint + } + (ProjectionElem::Field(f1, _), ProjectionElem::Field(f2, _)) => { + if f1 == f2 { + // same field (e.g., `a.y` vs. `a.y`) - recur. + Overlap::EqualOrDisjoint + } else { + let ty = pi1.ty(body, tcx).ty; + if ty.is_union() { + // Different fields of a union, we are basically stuck. + Overlap::Arbitrary + } else { + // Different fields of a struct (`a.x` vs. `a.y`). Disjoint! + Overlap::Disjoint + } + } + } + (ProjectionElem::Downcast(_, v1), ProjectionElem::Downcast(_, v2)) => { + // different variants are treated as having disjoint fields, + // even if they occupy the same "space", because it's + // impossible for 2 variants of the same enum to exist + // (and therefore, to be borrowed) at the same time. + // + // Note that this is different from unions - we *do* allow + // this code to compile: + // + // ``` + // fn foo(x: &mut Result) { + // let mut v = None; + // if let Ok(ref mut a) = *x { + // v = Some(a); + // } + // // here, you would *think* that the + // // *entirety* of `x` would be borrowed, + // // but in fact only the `Ok` variant is, + // // so the `Err` variant is *entirely free*: + // if let Err(ref mut a) = *x { + // v = Some(a); + // } + // drop(v); + // } + // ``` + if v1 == v2 { + Overlap::EqualOrDisjoint } else { - // Different fields of a struct (`a.x` vs. `a.y`). Disjoint! Overlap::Disjoint } } - } - (ProjectionElem::Downcast(_, v1), ProjectionElem::Downcast(_, v2)) => { - // different variants are treated as having disjoint fields, - // even if they occupy the same "space", because it's - // impossible for 2 variants of the same enum to exist - // (and therefore, to be borrowed) at the same time. - // - // Note that this is different from unions - we *do* allow - // this code to compile: - // - // ``` - // fn foo(x: &mut Result) { - // let mut v = None; - // if let Ok(ref mut a) = *x { - // v = Some(a); - // } - // // here, you would *think* that the - // // *entirety* of `x` would be borrowed, - // // but in fact only the `Ok` variant is, - // // so the `Err` variant is *entirely free*: - // if let Err(ref mut a) = *x { - // v = Some(a); - // } - // drop(v); - // } - // ``` - if v1 == v2 { - Overlap::EqualOrDisjoint - } else { - Overlap::Disjoint + ( + ProjectionElem::Index(..), + ProjectionElem::Index(..) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. }, + ) + | ( + ProjectionElem::ConstantIndex { .. } | ProjectionElem::Subslice { .. }, + ProjectionElem::Index(..), + ) => { + // Array indexes (`a[0]` vs. `a[i]`). These can either be disjoint + // (if the indexes differ) or equal (if they are the same). + match bias { + PlaceConflictBias::Overlap => { + // If we are biased towards overlapping, then this is the recursive + // case that gives "equal *or* disjoint" its meaning. + Overlap::EqualOrDisjoint + } + PlaceConflictBias::NoOverlap => { + // If we are biased towards no overlapping, then this is disjoint. + Overlap::Disjoint + } + } } - } - ( - ProjectionElem::Index(..), - ProjectionElem::Index(..) - | ProjectionElem::ConstantIndex { .. } - | ProjectionElem::Subslice { .. }, - ) - | ( - ProjectionElem::ConstantIndex { .. } | ProjectionElem::Subslice { .. }, - ProjectionElem::Index(..), - ) => { - // Array indexes (`a[0]` vs. `a[i]`). These can either be disjoint - // (if the indexes differ) or equal (if they are the same). - match bias { - PlaceConflictBias::Overlap => { - // If we are biased towards overlapping, then this is the recursive - // case that gives "equal *or* disjoint" its meaning. + ( + ProjectionElem::ConstantIndex { + offset: o1, + min_length: _, + from_end: false, + }, + ProjectionElem::ConstantIndex { + offset: o2, + min_length: _, + from_end: false, + }, + ) + | ( + ProjectionElem::ConstantIndex { + offset: o1, + min_length: _, + from_end: true, + }, + ProjectionElem::ConstantIndex { + offset: o2, + min_length: _, + from_end: true, + }, + ) => { + if o1 == o2 { Overlap::EqualOrDisjoint + } else { + Overlap::Disjoint } - PlaceConflictBias::NoOverlap => { - // If we are biased towards no overlapping, then this is disjoint. + } + ( + ProjectionElem::ConstantIndex { + offset: offset_from_begin, + min_length: min_length1, + from_end: false, + }, + ProjectionElem::ConstantIndex { + offset: offset_from_end, + min_length: min_length2, + from_end: true, + }, + ) + | ( + ProjectionElem::ConstantIndex { + offset: offset_from_end, + min_length: min_length1, + from_end: true, + }, + ProjectionElem::ConstantIndex { + offset: offset_from_begin, + min_length: min_length2, + from_end: false, + }, + ) => { + // both patterns matched so it must be at least the greater of the two + let min_length = std::cmp::max(min_length1, min_length2); + // `offset_from_end` can be in range `[1..min_length]`, 1 indicates the last + // element (like -1 in Python) and `min_length` the first. + // Therefore, `min_length - offset_from_end` gives the minimal possible + // offset from the beginning + if offset_from_begin >= min_length - offset_from_end { + Overlap::EqualOrDisjoint + } else { Overlap::Disjoint } } - } - ( - ProjectionElem::ConstantIndex { - offset: o1, - min_length: _, - from_end: false, - }, - ProjectionElem::ConstantIndex { - offset: o2, - min_length: _, - from_end: false, - }, - ) - | ( - ProjectionElem::ConstantIndex { - offset: o1, - min_length: _, - from_end: true, - }, - ProjectionElem::ConstantIndex { - offset: o2, - min_length: _, - from_end: true, - }, - ) => { - if o1 == o2 { - Overlap::EqualOrDisjoint - } else { - Overlap::Disjoint + ( + ProjectionElem::ConstantIndex { + offset, + min_length: _, + from_end: false, + }, + ProjectionElem::Subslice { + from, + to, + from_end: false, + }, + ) + | ( + ProjectionElem::Subslice { + from, + to, + from_end: false, + }, + ProjectionElem::ConstantIndex { + offset, + min_length: _, + from_end: false, + }, + ) => { + if (from..to).contains(&offset) { + Overlap::EqualOrDisjoint + } else { + Overlap::Disjoint + } } - } - ( - ProjectionElem::ConstantIndex { - offset: offset_from_begin, - min_length: min_length1, - from_end: false, - }, - ProjectionElem::ConstantIndex { - offset: offset_from_end, - min_length: min_length2, - from_end: true, - }, - ) - | ( - ProjectionElem::ConstantIndex { - offset: offset_from_end, - min_length: min_length1, - from_end: true, - }, - ProjectionElem::ConstantIndex { - offset: offset_from_begin, - min_length: min_length2, - from_end: false, - }, - ) => { - // both patterns matched so it must be at least the greater of the two - let min_length = std::cmp::max(min_length1, min_length2); - // `offset_from_end` can be in range `[1..min_length]`, 1 indicates the last - // element (like -1 in Python) and `min_length` the first. - // Therefore, `min_length - offset_from_end` gives the minimal possible - // offset from the beginning - if offset_from_begin >= min_length - offset_from_end { - Overlap::EqualOrDisjoint - } else { - Overlap::Disjoint + ( + ProjectionElem::ConstantIndex { + offset, + min_length: _, + from_end: false, + }, + ProjectionElem::Subslice { from, .. }, + ) + | ( + ProjectionElem::Subslice { from, .. }, + ProjectionElem::ConstantIndex { + offset, + min_length: _, + from_end: false, + }, + ) => { + if offset >= from { + Overlap::EqualOrDisjoint + } else { + Overlap::Disjoint + } } - } - ( - ProjectionElem::ConstantIndex { - offset, - min_length: _, - from_end: false, - }, - ProjectionElem::Subslice { - from, - to, - from_end: false, - }, - ) - | ( - ProjectionElem::Subslice { - from, - to, - from_end: false, - }, - ProjectionElem::ConstantIndex { - offset, - min_length: _, - from_end: false, - }, - ) => { - if (from..to).contains(&offset) { - Overlap::EqualOrDisjoint - } else { - Overlap::Disjoint + ( + ProjectionElem::ConstantIndex { + offset, + min_length: _, + from_end: true, + }, + ProjectionElem::Subslice { + to, from_end: true, .. + }, + ) + | ( + ProjectionElem::Subslice { + to, from_end: true, .. + }, + ProjectionElem::ConstantIndex { + offset, + min_length: _, + from_end: true, + }, + ) => { + if offset > to { + Overlap::EqualOrDisjoint + } else { + Overlap::Disjoint + } } - } - ( - ProjectionElem::ConstantIndex { - offset, - min_length: _, - from_end: false, - }, - ProjectionElem::Subslice { from, .. }, - ) - | ( - ProjectionElem::Subslice { from, .. }, - ProjectionElem::ConstantIndex { - offset, - min_length: _, - from_end: false, - }, - ) => { - if offset >= from { - Overlap::EqualOrDisjoint - } else { - Overlap::Disjoint + ( + ProjectionElem::Subslice { + from: f1, + to: t1, + from_end: false, + }, + ProjectionElem::Subslice { + from: f2, + to: t2, + from_end: false, + }, + ) => { + if f2 >= t1 || f1 >= t2 { + Overlap::Disjoint + } else { + Overlap::EqualOrDisjoint + } } - } - ( - ProjectionElem::ConstantIndex { - offset, - min_length: _, - from_end: true, - }, - ProjectionElem::Subslice { - to, from_end: true, .. - }, - ) - | ( - ProjectionElem::Subslice { - to, from_end: true, .. - }, - ProjectionElem::ConstantIndex { - offset, - min_length: _, - from_end: true, - }, - ) => { - if offset > to { + (ProjectionElem::Subslice { .. }, ProjectionElem::Subslice { .. }) => { Overlap::EqualOrDisjoint - } else { - Overlap::Disjoint } - } - ( - ProjectionElem::Subslice { - from: f1, - to: t1, - from_end: false, - }, - ProjectionElem::Subslice { - from: f2, - to: t2, - from_end: false, - }, - ) => { - if f2 >= t1 || f1 >= t2 { - Overlap::Disjoint - } else { + _ if self.matches_box_or_unique_deref(body, pi1, pi1_elem, pi2_elem) => { + // See `matches_box_or_unique_deref` for what this case is about Overlap::EqualOrDisjoint } + ( + ProjectionElem::Deref + | ProjectionElem::Field(..) + | ProjectionElem::Index(..) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::OpaqueCast { .. } + | ProjectionElem::Subslice { .. } + | ProjectionElem::Subtype(_) + | ProjectionElem::Downcast(..), + _, + ) => panic!( + "mismatched projections in place_element_conflict: {:?} and {:?}", + pi1_elem, pi2_elem + ), } - (ProjectionElem::Subslice { .. }, ProjectionElem::Subslice { .. }) => { - Overlap::EqualOrDisjoint - } - (ProjectionElem::Deref, ProjectionElem::Field(idx, _)) - if matches!(idx.as_u32(), 0 | 1) && pi1.ty(body, tcx).ty.is_box() => - { - // Special case for deref of box. - // - // Basically rustc is sloppy when it comes to boxes. It is perfectly permissible to have the following assignments: - // - // ``` - // fn Box::new(_1: T) { - // ... - // _5 = ShallowInitBox(move _4, std::sys::pal::unix::sync::mutex::Mutex); - // (*_5) = move _1; - // _0 = move _5; - // } - // ``` - // - // This is technically invalid, because `Box` is not a reference (or - // pointer) type, but defined as `struct Box(Unique, A)`. So technically it should be - // `*(_5.0.0)` to get to the contained value. - // - // Further more we must allow both the `.0` and `.1` field, since it - // also tries to match the allocator for conflicts. - // - // XXX(Justus): This does not try and adjust the the projection - // completely as explained above, instead it just matches the deref - // to the first projection on right of the assignment. This does not - // appear to be a problem at this point, but could cause more - // incorrect projections downstream. - Overlap::EqualOrDisjoint + } + + /// Special case for deref of `Box` and `Unique` + /// + /// Basically rustc is sloppy when it comes to boxes. It is perfectly permissible to have the following assignments: + /// + /// ``` + /// fn Box::new(_1: T) { + /// ... + /// _5 = ShallowInitBox(move _4, std::sys::pal::unix::sync::mutex::Mutex); + /// (*_5) = move _1; + /// _0 = move _5; + /// } + /// ``` + /// + /// This is technically invalid, because `Box` is not a reference (or + /// pointer) type, but defined as `struct Box(Unique, A)`. So technically it should be + /// `*(_5.0.0)` to get to the contained value. + /// + /// Further more we must allow both the `.0` and `.1` field, since it + /// also tries to match the allocator for conflicts. + /// + /// XXX(Justus): This does not try and adjust the the projection + /// completely as explained above, instead it just matches the deref + /// to the first projection on right of the assignment. This does not + /// appear to be a problem at this point, but could cause more + /// incorrect projections downstream. + fn matches_box_or_unique_deref( + &self, + body: &Body<'tcx>, + pi1: PlaceRef<'tcx>, + pi1_elem: PlaceElem<'tcx>, + pi2_elem: PlaceElem<'tcx>, + ) -> bool { + match (pi1_elem, pi2_elem) { + (ProjectionElem::Deref, ProjectionElem::Field(idx, _)) + | (ProjectionElem::Field(idx, _), ProjectionElem::Deref) + if matches!(idx.as_u32(), 0 | 1) => + { + let t = pi1.ty(body, self.tcx).ty; + t.is_box() || self.is_unique_ptr(t) + } + _ => false, } - ( - ProjectionElem::Deref - | ProjectionElem::Field(..) - | ProjectionElem::Index(..) - | ProjectionElem::ConstantIndex { .. } - | ProjectionElem::OpaqueCast { .. } - | ProjectionElem::Subslice { .. } - | ProjectionElem::Subtype(_) - | ProjectionElem::Downcast(..), - _, - ) => panic!( - "mismatched projections in place_element_conflict: {:?} and {:?}", - pi1_elem, pi2_elem - ), } } diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index 93d4b40268..e6e1e47c80 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -1,3 +1,6 @@ +use std::hash::Hash; +use std::time::Instant; + use super::{path_for_item, src_loc_for_span}; use crate::{ ann::{db::AutoMarkers, side_effect_detection}, @@ -18,7 +21,7 @@ use rustc_hash::FxHashSet; use rustc_hir::def_id::DefId; use rustc_middle::{ mir, - ty::{Instance, TyCtxt, TypingEnv}, + ty::{self, Instance, TyCtxt, TypingEnv}, }; use either::Either; @@ -48,6 +51,8 @@ struct GraphAssembler<'tcx, 'a> { /// conversion. types: HashMap>, known_def_ids: &'a mut FxHashSet, + counter: u64, + started: Instant, } /// Create a global PDG (spanning the entire call tree) starting from the given @@ -71,6 +76,11 @@ pub fn assemble_pdg<'tcx>( .body(); let async_state = determine_async(tcx, base_body_def_id, base_body); + debug!( + "async_state for {}: {}", + tcx.def_path_str(target), + async_state.map_or("not async", |(.., s)| s.describe()) + ); // These will refer to the function we are actually analyzing from, which may // be a generator if the target is async. @@ -81,6 +91,22 @@ pub fn assemble_pdg<'tcx>( let mut driver = VisitDriver::new(pdg_constructor, possible_generator_instance, k); let mut assembler = GraphAssembler::new(pctx.clone(), known_def_ids, base_body_def_id); + let print_estimated_size = |driver: &mut VisitDriver<'tcx, '_, u32>| { + if let Some(depth) = pctx.opts().anactrl().fail_on_deep_call_chain() { + let mut deepchain_printer = StacktracePrinter::new(tcx, depth); + driver.start(&mut deepchain_printer); + if deepchain_printer.locations_printed > 0 { + panic!("Printed a long chain"); + } + } + let mut estimator = flowistry_pdg_construction::GraphSizeEstimator::new(); + driver.start(&mut estimator); + println!( + "Graph for {}: {}", + tcx.def_path_str(target), + estimator.format_size() + ); + }; // If the top level function is async we start analysis from the generator // instead (because the async function itself is basically empty, it just @@ -95,6 +121,7 @@ pub fn assemble_pdg<'tcx>( location: RichLocation::Location(loc), }, |driver| { + print_estimated_size(driver); driver.start(&mut assembler); }, ); @@ -107,6 +134,7 @@ pub fn assemble_pdg<'tcx>( // have to manually sync up the actual arguments to the async function. assembler.fix_async_args(base_instance, loc, &mut driver); } else { + print_estimated_size(&mut driver); driver.start(&mut assembler); } let return_ = assembler.determine_return(); @@ -144,6 +172,8 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { known_def_ids, types: Default::default(), is_async, + counter: 0, + started: Instant::now(), } } @@ -167,6 +197,14 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { vis: &mut VisitDriver<'tcx, '_, K>, weight: &DepNode<'tcx, OneHopLocation>, ) -> GNode { + self.counter += 1; + if self.counter % 1_000_000 == 0 { + println!( + "[{}] Seen {} mil nodes", + self.started.elapsed().as_secs(), + self.counter / 1_000_000 + ); + } let weight = globalize_node(vis, weight, self.tcx()); let table = self.nodes.last_mut().unwrap(); let prior = table[node.index()]; @@ -187,6 +225,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { at: CallString, span: rustc_span::Span, is_arg: Option, + same: bool, ) -> GNode { GNode(self.graph.add_node(NodeInfo { at, @@ -196,6 +235,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { }, span: src_loc_for_span(span, self.tcx()), is_arg, + same, })) } @@ -276,21 +316,23 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { } else { (place.local.into(), place.projection.as_slice()) }; - trace!("Using base place {base_place:?} with projections {projections:?}"); + trace!( + "Using base place {base_place:?} in {} with projections {projections:?}", + tcx.def_path_str(function.def_id()) + ); let resolution = vis.current_function(); - // Thread through each caller to recover generic arguments let body = self.pctx.body_cache().get(function.def_id()).body(); - let raw_ty = base_place.ty(body, tcx); - let base_ty = try_monomorphize( + let ty = try_monomorphize( resolution, tcx, TypingEnv::fully_monomorphized(), - &raw_ty, + &body.local_decls[base_place.local].ty, span, ) .unwrap(); + let base_ty = place_ty_from_ty_and_projections(tcx, ty, base_place.projection); self.handle_node_types_helper(node, base_ty, projections); } @@ -555,6 +597,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { driver.globalize_location(&RichLocation::Start.into()), base_body.local_decls[arg].source_info.span, Some((arg.as_u32() - 1) as u16), + true, ) }) .collect::>(); @@ -563,6 +606,7 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { driver.globalize_location(&RichLocation::End.into()), base_body.local_decls[mir::RETURN_PLACE].source_info.span, None, + true, ); let mono_ty = |local| { @@ -590,6 +634,12 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { let local = mir::RETURN_PLACE; self.handle_node_types_helper(return_node, mono_ty(local), &[]); + let interpreter = + super::simple_interpreter::SimpleInterpreter::interpret_body(tcx, base_body).unwrap(); + + let coroutine_fields = + flowistry_pdg_construction::async_support::find_coroutine_assign(base_body).3; + // Establish connections to existing nodes let generator_loc = RichLocation::Location(loc); let transition_at = CallString::new(&[GlobalLocation { @@ -606,9 +656,10 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { else { continue; }; + if n.place.local.as_u32() == 1 && at.location == RichLocation::Start { let ridx = self.translate_node(nidx); - let Some(mir::ProjectionElem::Field(id, _)) = n.place.projection.first() else { + let Some(mir::ProjectionElem::Field(id, fty)) = n.place.projection.first() else { tcx.dcx().span_err( *span, format!("Expected field projection on async generator in {def_id:?}, found {:?}", n.place), @@ -616,7 +667,49 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { continue; }; - let arg = args_as_nodes[id.as_usize()]; + use rustc_middle::mir::Operand; + + let (Operand::Copy(op_in_parent) | Operand::Move(op_in_parent)) = + coroutine_fields[*id] + else { + tcx.dcx() + .span_err(*span, format!("Expected to find operator {id:?} in parent")); + continue; + }; + let target_arg = interpreter.resolve(op_in_parent).unwrap(); + + // Subtract for 0, which is always the return value + let target_local = target_arg.local.as_usize() - 1; + + let Some(arg) = args_as_nodes.get(target_local) else { + let inst = driver.current_function(); + for fun in [inst.def_id(), def_id] { + let mut f = std::io::BufWriter::new( + std::fs::File::create(format!("{}.mir", tcx.def_path_str(fun))) + .unwrap(), + ); + use rustc_middle::mir::pretty::{write_mir_fn, PrettyPrintMirOptions}; + write_mir_fn( + tcx, + self.ctx().body_cache().get(fun).body(), + &mut |_, _| Ok(()), + &mut f, + PrettyPrintMirOptions { + include_extra_comments: false, + }, + ) + .unwrap(); + } + tcx.dcx().span_err( + *span, + format!( + "Expected argument node for field {} ({fty:?}) in {}, found none", + id.as_usize(), + tcx.def_path_str(def_id) + ), + ); + continue; + }; self.graph.add_edge( arg.to_index(), ridx.to_index(), @@ -663,6 +756,18 @@ impl<'tcx, 'a> GraphAssembler<'tcx, 'a> { } } +fn place_ty_from_ty_and_projections<'tcx>( + tcx: TyCtxt<'tcx>, + ty: ty::Ty<'tcx>, + projection: &[mir::PlaceElem<'tcx>], +) -> mir::tcx::PlaceTy<'tcx> { + projection + .iter() + .fold(mir::tcx::PlaceTy::from_ty(ty), |place_ty, &elem| { + place_ty.projection_ty(tcx, elem) + }) +} + fn globalize_node<'tcx, K: Clone>( vis: &mut VisitDriver<'tcx, '_, K>, node: &DepNode<'tcx, OneHopLocation>, @@ -680,6 +785,71 @@ fn globalize_node<'tcx, K: Clone>( }, }, is_arg: node.use_.as_arg(), + same: is_pure_node(vis, node), + } +} + +fn place_is_idempotent_projection(place: mir::Place<'_>) -> bool { + use mir::PlaceElem; + place.projection.iter().all(|p| { + matches!( + p, + PlaceElem::Deref | PlaceElem::Subtype(_) | PlaceElem::OpaqueCast(_) + ) + }) +} + +fn is_pure_node<'tcx, K: Clone>( + vis: &VisitDriver<'tcx, '_, K>, + node: &DepNode<'tcx, OneHopLocation>, +) -> bool { + use mir::{Operand, Rvalue::*, StatementKind as Sk}; + let RichLocation::Location(loc) = node.at.location else { + // Only argument and return nodes appear at these locations and they + // are never operationally changed. + return true; + }; + match vis.current_body().stmt_at(loc) { + Either::Left(mir::Statement { kind, .. }) => match kind { + Sk::FakeRead(..) + | Sk::Deinit(..) + | Sk::StorageLive(..) + | Sk::StorageDead(..) + | Sk::Retag(..) + | Sk::PlaceMention(..) + | Sk::AscribeUserType(..) + | Sk::Coverage(..) + | Sk::ConstEvalCounter + | Sk::Nop + | Sk::BackwardIncompatibleDropHint { .. } => true, + Sk::Intrinsic(..) | Sk::SetDiscriminant { .. } => false, + Sk::Assign(a) => { + // This is a conservative match. If the place is projected out + // at all, we say it's not the same. In theory we could, for + // example, check whether it gets assigned to another projection + // on the same type and would admit more patterns. + + let place = match &a.1 { + Use(op) | Cast(_, op, _) | ShallowInitBox(op, _) => match op { + Operand::Copy(place) | Operand::Move(place) => place, + Operand::Constant(_) => return true, + }, + + Ref(_, _, place) | RawPtr(_, place) => place, + Aggregate(_, aggs) => { + return aggs + .iter() + .filter_map(|o| o.place()) + .all(place_is_idempotent_projection) + } + ThreadLocalRef(..) => return true, + Repeat(..) | Len(..) | BinaryOp(..) | NullaryOp(..) | UnaryOp(..) + | Discriminant(..) | CopyForDeref(..) => return false, + }; + place_is_idempotent_projection(*place) + } + }, + _ => false, } } @@ -715,7 +885,14 @@ impl<'tcx, K: std::hash::Hash + Eq + Clone> Visitor<'tcx, K> for GraphAssembler< _is_at_start: bool, ) { let [parent_table, this_table] = self.nodes.last_chunk_mut().unwrap(); - this_table[in_this.index()] = parent_table[in_caller.index()] + let parent_idx = parent_table[in_caller.index()]; + this_table[in_this.index()] = parent_idx; + + // We now know that this function is being inlined so we clear the + // purity flag in the parent which would have been set there previously + // as the statements were being traversed. + let weight = self.graph.node_weight_mut(parent_idx.0).unwrap(); + weight.same = true } fn visit_node( @@ -782,3 +959,51 @@ impl<'tcx, K: std::hash::Hash + Eq + Clone> Visitor<'tcx, K> for GraphAssembler< ); } } + +struct StacktracePrinter<'tcx> { + tcx: TyCtxt<'tcx>, + locations_printed: u32, + target_depth: usize, +} + +impl<'tcx> StacktracePrinter<'tcx> { + fn new(tcx: TyCtxt<'tcx>, target_depth: u32) -> Self { + Self { + tcx, + locations_printed: 0, + target_depth: target_depth as usize, + } + } +} + +impl<'tcx> Visitor<'tcx, u32> for StacktracePrinter<'tcx> { + fn visit_partial_graph( + &mut self, + vis: &mut VisitDriver<'tcx, '_, u32>, + partial_graph: &PartialGraph<'tcx, u32>, + ) { + let call_string = vis.call_stack(); + if self.target_depth <= call_string.len() && self.locations_printed < 5 { + self.locations_printed += 1; + println!( + "Found deep graph for {}", + self.tcx.def_path_str(partial_graph.def_id()) + ); + for loc in call_string.iter().rev() { + let fun = loc.function; + + let span = self.tcx.def_span(fun); + let smap = &self.tcx.sess.source_map(); + let (_, start, ..) = smap.span_to_location_info(span); + println!( + " Called from {:100} {}:{start}", + self.tcx.def_path_str(fun), + smap.span_to_filename(span) + .display(rustc_span::FileNameDisplayPreference::Local), + ) + } + println!(); + } + vis.visit_partial_graph(self, partial_graph); + } +} diff --git a/crates/paralegal-flow/src/ana/inline_judge.rs b/crates/paralegal-flow/src/ana/inline_judge.rs index 85286d21f4..ec9f7c71d3 100644 --- a/crates/paralegal-flow/src/ana/inline_judge.rs +++ b/crates/paralegal-flow/src/ana/inline_judge.rs @@ -5,7 +5,7 @@ use paralegal_spdg::{utils::write_sep, Identifier}; use rustc_hir::def_id::{CrateNum, DefId}; use rustc_middle::ty::{ AssocKind, BoundVariableKind, Clause, ClauseKind, Instance, ProjectionPredicate, - TraitPredicate, TypingEnv, + TraitPredicate, TyCtxt, TypingEnv, }; use rustc_span::Span; use rustc_type_ir::{PredicatePolarity, TyKind}; @@ -87,7 +87,7 @@ impl<'tcx> InlineJudge<'tcx> { .marker_if_unloadable(marker_target_def_id) .is_some() => { - InlineJudgement::AbstractViaType("cannot unloadable item") + InlineJudgement::AbstractViaType("unloadable item") } _ if self.ctx.tcx().is_constructor(marker_target_def_id) => { // This is an enum constructor. It would be better to handle @@ -135,14 +135,20 @@ impl<'tcx> InlineJudge<'tcx> { InliningDepth::Unconstrained => InlineJudgement::Inline(false), }; if let InlineJudgement::AbstractViaType(reason) = judgement { - let emit_err = !(is_marked || self.ctx.opts().relaxed()); - self.ensure_is_safe_to_approximate( - info.param_env, - info.callee, - info.span, - emit_err, - reason, - ) + if !self + .marker_ctx() + .all_markers_associated_with(marker_target_def_id) + .any(|m| m.as_str().starts_with("std:")) + { + let emit_err = !(is_marked || self.ctx.opts().relaxed()); + self.ensure_is_safe_to_approximate( + info.param_env, + info.callee, + info.span, + emit_err, + reason, + ) + } } judgement } @@ -192,14 +198,36 @@ struct SafetyChecker<'tcx> { reason: &'static str, } +struct PpInst<'tcx> { + tcx: TyCtxt<'tcx>, + i: Instance<'tcx>, +} + +impl std::fmt::Display for PpInst<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.tcx.def_path_str(self.i.def_id()))?; + if !self.i.args.is_empty() { + write!(f, "<")?; + write_sep(f, ",", self.i.args, |a, fmt| write!(fmt, "{:?}", a))?; + write!(f, ">")?; + } + Ok(()) + } +} + impl<'tcx> SafetyChecker<'tcx> { /// Emit an error or a warning with some preformatted messaging. fn err(&self, s: &str, span: Span) { let sess = self.ctx.tcx().dcx(); let msg = format!( - "the call to {:?} is not safe to abstract as demanded by '{}', because of: {s}", - self.resolved, self.reason + "the call to {} is not safe to abstract as demanded by '{}', because of: {s}", + PpInst { + i: self.resolved, + tcx: self.ctx.tcx() + }, + self.reason ); + return; if self.emit_err { let mut diagnostic = sess.struct_span_err(span, msg); diagnostic.span_note(self.call_span, "Called from here"); @@ -213,11 +241,20 @@ impl<'tcx> SafetyChecker<'tcx> { /// Emit an error that mentions the `markers` found fn err_markers(&self, s: &str, markers: &[Identifier], span: Span) { - if !markers.is_empty() { + let mut markers = markers + .iter() + .filter(|m| !m.as_str().starts_with("std:")) + .peekable(); + if markers.peek().is_some() { self.err( &format!( "{s}: found marker(s) {}", - Print(|fmt| write_sep(fmt, ", ", markers, |elem, fmt| write!(fmt, "'{elem}'"))) + Print( + |fmt| write_sep(fmt, ", ", markers.clone(), |elem, fmt| write!( + fmt, + "'{elem}'" + )) + ) ), span, ); diff --git a/crates/paralegal-flow/src/ana/mod.rs b/crates/paralegal-flow/src/ana/mod.rs index 5af696ec78..d9858b840b 100644 --- a/crates/paralegal-flow/src/ana/mod.rs +++ b/crates/paralegal-flow/src/ana/mod.rs @@ -40,6 +40,7 @@ use rustc_span::{ErrorGuaranteed, FileNameDisplayPreference, Span as RustSpan, S mod graph_converter; mod inline_judge; +mod simple_interpreter; use std::time::Duration; diff --git a/crates/paralegal-flow/src/ana/simple_interpreter.rs b/crates/paralegal-flow/src/ana/simple_interpreter.rs new file mode 100644 index 0000000000..2afed6aacd --- /dev/null +++ b/crates/paralegal-flow/src/ana/simple_interpreter.rs @@ -0,0 +1,186 @@ +use rustc_middle::{ + mir::{self, visit, Operand, Place, Rvalue}, + ty::TyCtxt, +}; + +#[derive(Clone)] +enum InterpretationState<'tcx> { + Uninitialized, + Argument, + Set(Place<'tcx>), + Err(String), +} + +pub struct SimpleInterpreter<'tcx> { + tcx: TyCtxt<'tcx>, + locals: Vec>, +} + +impl<'tcx> SimpleInterpreter<'tcx> { + fn new(tcx: TyCtxt<'tcx>, num_args: usize, num_locals: usize) -> Self { + assert!(num_args < num_locals); + let mut locals = vec![InterpretationState::Uninitialized; num_locals]; + for i in 0..(num_args + 1) { + locals[i] = InterpretationState::Argument; + } + Self { tcx, locals } + } + + pub fn resolve(&self, place: Place<'tcx>) -> Result, String> { + match self.locals.get(place.local.as_usize()) { + Some(InterpretationState::Argument) => Ok(place), + Some(InterpretationState::Set(p)) => self + .resolve(*p) + .map(|p| p.project_deeper(place.projection, self.tcx)), + Some(InterpretationState::Err(e)) => Err(e.clone()), + _ => Err(format!("Uninitialized local {place:?}")), + } + } + + pub fn interpret_body(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) -> Result { + let mut interpreter = SimpleInterpreter::new(tcx, body.arg_count, body.local_decls.len()); + use visit::Visitor; + interpreter.visit_body(body); + Ok(interpreter) + } +} + +impl<'tcx> visit::Visitor<'tcx> for SimpleInterpreter<'tcx> { + fn visit_assign( + &mut self, + place: &Place<'tcx>, + rvalue: &Rvalue<'tcx>, + location: mir::Location, + ) { + let state = &mut self.locals[place.local.as_usize()]; + match rvalue { + Rvalue::Use(Operand::Move(place)) | Rvalue::Use(Operand::Copy(place)) => match state { + InterpretationState::Uninitialized => { + *state = InterpretationState::Set(*place); + } + InterpretationState::Set(prior) => { + *state = InterpretationState::Err(format!("Duplicate assignment to local {place:?} at {location:?}. Previous assignment {prior:?}")); + } + _ => (), + }, + _ => { + *state = InterpretationState::Err(format!("Unsupported rvalue: {rvalue:?}")); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rustc_abi::FieldIdx; + use rustc_borrowck::consumers::BodyWithBorrowckFacts; + use rustc_middle::mir::{self, ProjectionElem}; + use rustc_utils::test_utils::CompileBuilder; + use std::collections::HashMap; + + fn clear_projection_elem(e: ProjectionElem) -> ProjectionElem<(), ()> { + match e { + ProjectionElem::Deref => ProjectionElem::Deref, + ProjectionElem::Field(f, _) => ProjectionElem::Field(f, ()), + ProjectionElem::Index(_) => ProjectionElem::Index(()), + ProjectionElem::ConstantIndex { + offset, + min_length, + from_end, + } => ProjectionElem::ConstantIndex { + offset, + min_length, + from_end, + }, + ProjectionElem::Subslice { from, to, from_end } => { + ProjectionElem::Subslice { from, to, from_end } + } + ProjectionElem::Downcast(ty, variant_index) => { + ProjectionElem::Downcast(ty, variant_index) + } + ProjectionElem::OpaqueCast(..) => ProjectionElem::OpaqueCast(()), + ProjectionElem::Subtype(_) => ProjectionElem::Subtype(()), + } + } + + fn projections_from_string(s: &str) -> Vec> { + s.split_whitespace() + .map(|s| match s { + _ if s.starts_with('.') => { + ProjectionElem::Field(FieldIdx::from_u32(s[1..].parse().unwrap()), ()) + } + "*" => ProjectionElem::Deref, + _ => panic!("Invalid projection element: {s}"), + }) + .collect() + } + + struct TestHelper<'tcx> { + name_map: HashMap<&'tcx str, Place<'tcx>>, + tcx: TyCtxt<'tcx>, + body: &'tcx BodyWithBorrowckFacts<'tcx>, + interpreter: SimpleInterpreter<'tcx>, + } + + impl<'tcx> TestHelper<'tcx> { + fn new(tcx: TyCtxt<'tcx>, body: &'tcx BodyWithBorrowckFacts<'tcx>) -> Self { + let name_map = body + .body + .var_debug_info + .iter() + .filter_map(|info| { + if let mir::VarDebugInfoContents::Place(p) = info.value { + Some((info.name.as_str(), p)) + } else { + None + } + }) + .collect::>(); + let interpreter = SimpleInterpreter::interpret_body(tcx, &body.body).unwrap(); + Self { + tcx, + body, + interpreter, + name_map, + } + } + + fn assert_resolves_to(&self, name: &str, base: &str, projections: &str) { + let start = self.name_map[name]; + let base = self.name_map[base]; + let resolved = self.interpreter.resolve(start).unwrap(); + assert_eq!(resolved.local, base.local); + assert_eq!( + resolved + .projection + .iter() + .map(clear_projection_elem) + .collect::>(), + base.projection + .iter() + .map(clear_projection_elem) + .chain(projections_from_string(projections)) + .collect::>() + ); + } + } + + #[test] + #[ignore = "Somehow this can't load MIR bodies???"] + fn test_simple_interpreter() { + CompileBuilder::new(stringify! { + fn foo(x: ((i32, i32), i32)) { + let y = x.0; + let z = y.0; + } + }) + .compile(|result| { + let (_, body) = result.as_body(); + let helper = TestHelper::new(result.tcx, body); + helper.assert_resolves_to("y", "x", ".0"); + helper.assert_resolves_to("z", "x", ".0 .0"); + }) + .unwrap(); + } +} diff --git a/crates/paralegal-flow/src/ann/db/mod.rs b/crates/paralegal-flow/src/ann/db/mod.rs index 13b6526aab..7b1971049f 100644 --- a/crates/paralegal-flow/src/ann/db/mod.rs +++ b/crates/paralegal-flow/src/ann/db/mod.rs @@ -15,9 +15,7 @@ use crate::{ args::{Args, Stub}, utils::{ self, is_function_like, - resolve::{ - expect_resolve_string_to_def_id, report_resolution_err, resolve_string_to_def_id, - }, + resolve::{self, expect_resolve_string_to_def_id, resolve_string_to_def_id}, IntoDefId, }, Either, HashMap, @@ -29,6 +27,8 @@ use flowistry_pdg_construction::source_access::{ use itertools::Itertools; use paralegal_spdg::{Identifier, TypeId}; +pub use paralegal_spdg::AutoMarkers; + use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashSet; use rustc_errors::DiagMessage; @@ -476,44 +476,6 @@ impl ItemMarkers { } } -pub struct AutoMarkers { - pub side_effect_unknown_virtual: Identifier, - pub side_effect_foreign: Identifier, - pub side_effect_unknown_fn_ptr: Identifier, - pub side_effect_raw_ptr: Identifier, - pub side_effect_transmute: Identifier, - pub side_effect_unknown: Identifier, - pub side_effect_intrinsic: Identifier, -} - -impl Default for AutoMarkers { - fn default() -> Self { - AutoMarkers { - side_effect_unknown_virtual: Identifier::new_intern("auto:side-effect:unknown:virtual"), - side_effect_foreign: Identifier::new_intern("auto:side-effect:foreign"), - side_effect_unknown_fn_ptr: Identifier::new_intern("auto:side-effect:unknown:fn-ptr"), - side_effect_raw_ptr: Identifier::new_intern("auto:side-effect:raw-ptr"), - side_effect_transmute: Identifier::new_intern("auto:side-effect:transmute"), - side_effect_unknown: Identifier::new_intern("auto:side-effect:unknown"), - side_effect_intrinsic: Identifier::new_intern("auto:side-effect:intrinsic"), - } - } -} - -impl AutoMarkers { - pub fn all(&self) -> [Identifier; 7] { - [ - self.side_effect_unknown_virtual, - self.side_effect_foreign, - self.side_effect_unknown_fn_ptr, - self.side_effect_raw_ptr, - self.side_effect_transmute, - self.side_effect_unknown, - self.side_effect_intrinsic, - ] - } -} - pub struct FunctionMarkerStat<'tcx> { pub function: MaybeMonomorphized<'tcx>, pub is_constructor: bool, @@ -576,10 +538,10 @@ impl<'tcx> MarkerDatabase<'tcx> { .build_config() .stubs .iter() - .filter_map(|(k, v)| { - let res = expect_resolve_string_to_def_id(tcx, k, args.relaxed()); - let res = if args.relaxed() { res? } else { res.unwrap() }; - Some((res, v)) + .flat_map(|(k, v)| { + expect_resolve_string_to_def_id(tcx, k, args.relaxed()) + .into_iter() + .zip(std::iter::repeat(v)) }) .collect(); let included_crates = Rc::new(args.anactrl().inclusion_predicate(tcx)); @@ -597,6 +559,11 @@ impl<'tcx> MarkerDatabase<'tcx> { for ann in anns { match ann { Annotation::Marker(r) => { + trace!( + "Assigning marker {} to {}", + r.marker, + tcx.def_path_str(item) + ); for s in refinement_to_selectors(r.refinement) { markers .entry(item) @@ -783,8 +750,7 @@ fn parse_external_marker_file(s: &str) -> anyhow::Result { /// Given the TOML of external annotations we have parsed, resolve the paths /// (keys of the map) to [`DefId`]s. fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { - //let relaxed = opts.relaxed(); - let relaxed = false; + let relaxed = opts.relaxed(); if let Some(annotation_file) = opts.marker_control().external_annotations() { let data = std::fs::read_to_string(annotation_file).unwrap_or_else(|_| { panic!( @@ -806,21 +772,23 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { let new_map: ExternalMarkers = from_toml .iter() - .filter_map(|(path, entries)| { + .flat_map(|(path, entries)| { let res = resolve_string_to_def_id(tcx, path); let must_succeed = entries .iter() .any(|entry| !entry.refinement._internal_can_fail_resolve_silently); - let def_id = match res { - Err(e) if !must_succeed => { + let def_ids = res.unwrap_or_else(|e| { + if !must_succeed { trace!("Failed to resolve path {}: {:?}", path, e); - return None; + } else { + tcx.dcx() + .err(format!("Failed to resolve path {}: {:?}", path, e)); } - _ => report_resolution_err(tcx, path, relaxed, res)?, - }; - Some((def_id, entries)) + vec![] + }); + def_ids.into_iter().zip(std::iter::repeat(entries)) }) - .flat_map(|(def_id, entries)| { + .flat_map(|(res, entries)| { let on_module_children = entries .iter() .fold(None, |acc, entry| { @@ -829,7 +797,7 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { }) { tcx.dcx().err(format!( "Conflicting use of `on_all_module_children` on {}", - tcx.def_path_str(def_id) + res.as_string(tcx) )); } Some( @@ -838,20 +806,34 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { ) }) .unwrap_or(false); - let def_kind = tcx.def_kind(def_id); - let only_self = [def_id]; + let only_self = match res { + resolve::Res::Def(_, id) => Box::new([id]) as Box<[_]>, + _ => Box::new([]), + }; let def_ids = if on_module_children { - let defs = match def_kind { - DefKind::Struct | DefKind::Enum => tcx.inherent_impls(def_id), - DefKind::Mod | DefKind::Impl { .. } => &only_self, - _ => panic!( - "Expected module-like def kind for {}, got {def_kind:?}", - tcx.def_path_str(def_id) - ), + let defs = match res { + resolve::Res::PrimTy(t) => tcx.incoherent_impls(t), + resolve::Res::Def(_, id) => { + let def_kind =tcx.def_kind(id); + if !matches!(def_kind, DefKind::Impl {..} | DefKind::Struct | DefKind::Enum | DefKind::Mod | DefKind::Trait) { + tcx.dcx().err(format!("on_all_module_children used on a non-module-like item {}, has kind {def_kind:?}", tcx.def_path_str(id))); + }; + &only_self + }, }; + utils::flatten_child_items(tcx, defs.iter().copied()) } else { - [def_id].into_iter().collect() + match res { + resolve::Res::Def(_, id) => Some(id), + resolve::Res::PrimTy(t) => { + tcx.dcx() + .err(format!("Cannot assign markers to primitive type {t:?}")); + None + } + } + .into_iter() + .collect() }; def_ids.into_iter().flat_map(|def_id| { entries.iter().map(move |entry| { @@ -860,7 +842,22 @@ fn resolve_external_markers(opts: &Args, tcx: TyCtxt) -> ExternalMarkers { }) }) }) - .collect(); + .into_grouping_map() + .reduce(|mut one: Vec<_>, _, mut other| { + one.extend(other.drain(..)); + one + }); + // let mut f = std::io::BufWriter::new( + // std::fs::File::create("computed-external-marker-assignments.txt").unwrap(), + // ); + // use std::io::Write; + // for (item, markers) in new_map.iter() { + // write!(f, "{:20} : ", tcx.def_path_str(item)).unwrap(); + // for ann in markers.iter() { + // write!(f, "{}, ", ann.marker).unwrap(); + // } + // writeln!(f).unwrap(); + // } new_map } else { HashMap::new() diff --git a/crates/paralegal-flow/src/ann/db/reachable.rs b/crates/paralegal-flow/src/ann/db/reachable.rs index 7a5bbd808a..628b1df054 100644 --- a/crates/paralegal-flow/src/ann/db/reachable.rs +++ b/crates/paralegal-flow/src/ann/db/reachable.rs @@ -35,6 +35,10 @@ impl<'tcx> MarkerCtx<'tcx> { pub fn get_reachable_markers(&self, res: impl Into>) -> &[Identifier] { let res = res.into(); let def_id = res.def_id(); + trace!( + "Checking reachable markers for {}", + self.tcx().def_path_str(def_id) + ); let mark_side_effects = self.db().config.marker_control().mark_side_effects(); if self.is_marked(def_id) { trace!(" Is marked"); @@ -74,7 +78,10 @@ impl<'tcx> MarkerCtx<'tcx> { } let non_direct = (!is_self_marked).then(|| self.get_reachable_markers(res)); + let filter_std_markers = self.db().config.marker_control().elide_on_whitelist_markers(); + direct_markers.chain(non_direct.into_iter().flatten().copied()) + .filter(move |m| !filter_std_markers || !(m.as_str().starts_with("std:") || m.as_str().starts_with("safe-libs:"))) } /// If the transitive marker cache did not contain the answer, this is what @@ -119,6 +126,7 @@ impl<'tcx> MarkerCtx<'tcx> { use mir::visit::Visitor; vis.visit_body(&mono_body); let found = vis.found_markers; + trace!("Found markers {found:?}"); found.into_iter().collect() } diff --git a/crates/paralegal-flow/src/ann/parse.rs b/crates/paralegal-flow/src/ann/parse.rs index 47dbff5dbe..cb7d988820 100644 --- a/crates/paralegal-flow/src/ann/parse.rs +++ b/crates/paralegal-flow/src/ann/parse.rs @@ -501,7 +501,7 @@ pub(crate) fn otype_ann_match(ann: &ast::AttrArgs, tcx: TyCtxt) -> Result FxHashSet { diff --git a/crates/paralegal-flow/src/args.rs b/crates/paralegal-flow/src/args.rs index 1ed970ad7e..a48cb01482 100644 --- a/crates/paralegal-flow/src/args.rs +++ b/crates/paralegal-flow/src/args.rs @@ -403,6 +403,7 @@ impl Args { self.relaxed.hash(hasher); self.target.hash(hasher); self.result_path.hash(hasher); + self.anactrl().hash(hasher); config_hash_for_file(&self.marker_control.external_annotations, hasher); } @@ -469,6 +470,8 @@ pub struct MarkerControl { /// Implies `--include-std`. #[clap(long, env)] side_effect_markers: bool, + #[clap(long)] + elide_on_whitelist_markers: bool, } impl MarkerControl { @@ -479,6 +482,10 @@ impl MarkerControl { pub fn mark_side_effects(&self) -> bool { self.side_effect_markers } + + pub fn elide_on_whitelist_markers(&self) -> bool { + self.elide_on_whitelist_markers + } } /// Arguments that control the flow analysis @@ -515,6 +522,8 @@ struct ClapAnalysisCtrl { /// Recompile the standard library and make the code available for analysis. #[clap(long, env)] include_std: bool, + #[clap(long)] + fail_on_deep_call_chain: Option, } #[derive(serde::Serialize, serde::Deserialize)] @@ -532,6 +541,27 @@ pub struct AnalysisCtrl { included_crate_cache: OnceLock>, no_pdg_cache: bool, include_std: bool, + fail_on_deep_call_chain: Option, +} + +impl std::hash::Hash for AnalysisCtrl { + fn hash(&self, state: &mut H) { + let Self { + analyze, + inlining_depth, + include, + included_crate_cache: _, + no_pdg_cache, + include_std, + fail_on_deep_call_chain + } = self; + analyze.hash(state); + inlining_depth.hash(state); + include.hash(state); + no_pdg_cache.hash(state); + include_std.hash(state); + fail_on_deep_call_chain.hash(state); + } } impl Default for AnalysisCtrl { @@ -543,6 +573,7 @@ impl Default for AnalysisCtrl { no_pdg_cache: false, included_crate_cache: OnceLock::new(), include_std: false, + fail_on_deep_call_chain: None, } } } @@ -558,6 +589,7 @@ impl TryFrom for AnalysisCtrl { no_adaptive_approximation, k_depth, include_std, + fail_on_deep_call_chain } = value; let inlining_depth = if no_interprocedural_analysis { @@ -575,11 +607,12 @@ impl TryFrom for AnalysisCtrl { no_pdg_cache, included_crate_cache: OnceLock::new(), include_std, + fail_on_deep_call_chain }) } } -#[derive(serde::Serialize, serde::Deserialize, strum::EnumIs, strum::AsRefStr, Clone)] +#[derive(serde::Serialize, serde::Deserialize, strum::EnumIs, strum::AsRefStr, Clone, Hash)] pub enum InliningDepth { /// Inline to arbitrary depth Unconstrained, @@ -671,6 +704,10 @@ impl AnalysisCtrl { pub fn include_std(&self) -> bool { self.include_std } + + pub fn fail_on_deep_call_chain(&self) -> Option { + self.fail_on_deep_call_chain + } } impl DumpArgs { diff --git a/crates/paralegal-flow/src/bin/paralegal-flow.rs b/crates/paralegal-flow/src/bin/paralegal-flow.rs index b0b8c84a65..29e3184abc 100644 --- a/crates/paralegal-flow/src/bin/paralegal-flow.rs +++ b/crates/paralegal-flow/src/bin/paralegal-flow.rs @@ -28,10 +28,11 @@ impl VersionArgs { } let second = chars.next(); if second == Some('-') { - self.handle_long_arg(&a[2..]) - } - for c in second.into_iter().chain(chars) { - self.handle_short_arg(c); + self.handle_long_arg(&a[2..]); + } else { + for c in second.into_iter().chain(chars) { + self.handle_short_arg(c); + } } } diff --git a/crates/paralegal-flow/src/discover.rs b/crates/paralegal-flow/src/discover.rs index 67175600b9..da99ad1535 100644 --- a/crates/paralegal-flow/src/discover.rs +++ b/crates/paralegal-flow/src/discover.rs @@ -65,8 +65,8 @@ impl<'tcx> CollectingVisitor<'tcx> { .anactrl() .selected_targets() .iter() - .filter_map(|path| { - let def_id = expect_resolve_string_to_def_id(tcx, path, opts.relaxed())?; + .flat_map(|path| expect_resolve_string_to_def_id(tcx, path, opts.relaxed())) + .filter_map(|def_id| { if !def_id.is_local() { tcx.dcx().span_err(tcx.def_span(def_id), format!("found an external function {def_id:?} as analysis target. Analysis targets are required to be local.")); return None; diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 474daf594f..475fd63926 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -568,23 +568,31 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { .cargo_args() .iter() .any(|a| a.starts_with("-p") || a == "--package"); - if args.target().is_some() | args_select_package { - let mut new_cmd = std::process::Command::new(cargo.get_program()); - for (k, v) in cargo.get_envs() { - if k == "RUSTC_WORKSPACE_WRAPPER" { - new_cmd.env("RUSTC_WRAPPER", v.unwrap()); - } else if let Some(v) = v { - new_cmd.env(k, v); - } else { - new_cmd.env_remove(k); - } - } - if let Some(wd) = cargo.get_current_dir() { - new_cmd.current_dir(wd); + // And here we also set cargo to the specific cargo for our toolchain instead + let mut new_cmd = std::process::Command::new( + std::path::Path::new(env!("SYSROOT_PATH")) + .join("bin") + .join("cargo"), + ); + for (k, v) in cargo.get_envs() { + if k == "RUSTC_WORKSPACE_WRAPPER" { + new_cmd.env("RUSTC_WRAPPER", v.unwrap()); + } else if let Some(v) = v { + new_cmd.env(k, v); + } else { + new_cmd.env_remove(k); } + } + if let Some(wd) = cargo.get_current_dir() { + new_cmd.current_dir(wd); + } + if args.target().is_some() | args_select_package { new_cmd.args(cargo.get_args().filter(|a| *a != "--all")); - *cargo = new_cmd + } else { + new_cmd.args(cargo.get_args()); } + + *cargo = new_cmd; if let Some(target) = args.target().as_ref() { if !args_select_package { cargo.args(["-p", target]); @@ -594,6 +602,7 @@ impl rustc_plugin::RustcPlugin for DfppPlugin { cargo.arg("-Zbuild-std=std,core,alloc,proc_macro"); } cargo.args(args.cargo_args()); + debug!("Running cargo command: {cargo:?}"); } fn run( diff --git a/crates/paralegal-flow/src/test_utils.rs b/crates/paralegal-flow/src/test_utils.rs index 190ba30565..b6b1bcee63 100644 --- a/crates/paralegal-flow/src/test_utils.rs +++ b/crates/paralegal-flow/src/test_utils.rs @@ -25,7 +25,7 @@ use std::{ use std::{process::Command, sync::atomic::Ordering}; use paralegal_spdg::{ - traverse::{generic_flows_to, generic_influencers, EdgeSelection}, + traverse::{edge_generic_flows_to, generic_flows_to, generic_influencers, EdgeSelection}, utils::{display_list, write_sep}, DefInfo, DisplayPath, EdgeInfo, Endpoint, InstructionInfo, InstructionKind, Node, NodeInfo, NodeKind, TypeId, SPDG, @@ -614,6 +614,22 @@ impl<'g> CtrlRef<'g> { } } + pub fn arguments_by_index(&'g self) -> Vec> { + let mut args = vec![]; + for n in self.ctrl.arguments.iter() { + let w = self.ctrl.graph.node_weight(*n).unwrap(); + let Some(a) = w.is_arg else { continue }; + if (a as usize) >= args.len() { + args.resize_with((a as usize) + 1, || NodeRefs { + graph: self, + nodes: vec![], + }); + } + args[a as usize].nodes.push(*n); + } + args + } + pub fn constants(&'g self) -> impl Iterator, Constant)> { let spdg = self.spdg(); spdg.all_sources().filter_map(|node| { @@ -652,6 +668,40 @@ impl<'g> CtrlRef<'g> { let auto = auto_markers.all(); auto.into_iter().flat_map(|m| self.marked(m)) } + + pub fn show_side_effects(&self, show_trace: bool) { + let auto_markers = AutoMarkers::default(); + let auto = auto_markers.all(); + for m in auto { + let marked = self.marked(m); + if !marked.is_empty() { + println!("Side effect {m}"); + } + for n in marked { + let d = DisplayPath::from( + &self.graph().desc.def_info[&n.info().at.leaf().function].path, + ); + println!( + "{} in {} in {}", + n.info().kind, + n.instruction_info().description, + d + ); + if !show_trace { + continue; + } + for loc in n.info().at.iter() { + let i_info = &self.graph.desc.instruction_info[&loc]; + let d_info = &self.graph().desc.def_info[&loc.function]; + println!( + " called from {} {}", + DisplayPath::from(&d_info.path), + i_info.span + ); + } + } + } + } } impl<'g> HasGraph<'g> for &FnRef<'g> { @@ -1009,6 +1059,27 @@ pub trait FlowsTo { .copied() .any(|n| self.spdg().graph.neighbors(n).next().is_some()) } + + fn flows_to_unchanged(&self, other: &impl FlowsTo) -> bool { + if self.spdg_ident() != other.spdg_ident() { + return false; + } + + let graph = &self.spdg().graph; + + let fgraph = petgraph::visit::NodeFiltered::from_fn(graph, |n| { + graph.node_weight(n).unwrap().same + || self.nodes().contains(&n) + || other.nodes().contains(&n) + }); + + generic_flows_to( + self.nodes().iter().copied(), + &fgraph, + other.nodes().iter().copied(), + ) + .is_some() + } } fn influences_ctrl_impl( @@ -1026,7 +1097,7 @@ fn influences_ctrl_impl( EdgeSelection::Control, ); - generic_flows_to( + edge_generic_flows_to( slf.nodes().iter().copied(), edge_selection, slf.spdg(), @@ -1070,7 +1141,7 @@ fn flows_to_impl( if slf.spdg_ident() != other.spdg_ident() { return false; } - generic_flows_to( + edge_generic_flows_to( slf.nodes().iter().copied(), edge_selection, slf.spdg(), diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index aba5902af0..2cfc8bd3db 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -818,21 +818,49 @@ pub fn flatten_child_items( ) -> FxHashSet { use rustc_hir::def::DefKind; let mut queue: Vec<_> = modules.into_iter().collect(); + trace!("flatten_child_items starting with: {:?}", queue); let mut seen = FxHashSet::default(); seen.extend(queue.iter().cloned()); let mut result = FxHashSet::default(); while let Some(module) = queue.pop() { - let children = match tcx.def_kind(module) { - DefKind::Mod => Box::new( - if let Some(local) = module.as_local() { + let def_kind = tcx.def_kind(module); + trace!( + "Processing item: {} with def kind {:?}", + tcx.def_path_str(module), + def_kind + ); + let children = match def_kind { + DefKind::Mod => { + let it = if let Some(local) = module.as_local() { tcx.module_children_local(local) } else { tcx.module_children(module) } .iter() - .filter_map(|c| c.res.opt_def_id()), - ) as Box>, + .filter_map(|c| c.res.opt_def_id()) + .filter(|c| { + let parent = tcx.opt_parent(*c); + assert!( + parent.is_some() || c.is_crate_root(), + "{c:?} has no parent but isn't a crate (is {:?})", + tcx.def_kind(*c) + ); + parent.is_some_and(|parent| parent == module) + }) + .chain( + // Trait impls are not contained in `module_children` where + // they are defined, but instead associated with the crate + // itself. + module + .as_crate_root() + .map(|c| tcx.trait_impls_in_crate(c)) + .into_iter() + .flatten() + .copied(), + ); + Box::new(it) as Box> + } DefKind::Impl { .. } => Box::new( tcx.associated_items(module) .in_definition_order() @@ -841,10 +869,18 @@ pub fn flatten_child_items( DefKind::Struct | DefKind::Enum => { Box::new(tcx.inherent_impls(module).iter().copied()) as Box<_> } - _ => continue, + _ => { + continue; + } }; for id in children { - match tcx.def_kind(id) { + let def_kind = tcx.def_kind(id); + trace!( + "Processing child item: {} with def kind {:?}", + tcx.def_path_str(id), + def_kind + ); + match def_kind { DefKind::Struct | DefKind::Enum | DefKind::Mod | DefKind::Impl { .. } if !seen.contains(&id) => { diff --git a/crates/paralegal-flow/src/utils/resolve.rs b/crates/paralegal-flow/src/utils/resolve.rs index 82009edb64..7546a4959c 100644 --- a/crates/paralegal-flow/src/utils/resolve.rs +++ b/crates/paralegal-flow/src/utils/resolve.rs @@ -1,4 +1,4 @@ -use std::hash::Hash; +use std::{hash::Hash, panic}; use ast::Mutability; use hir::{ @@ -18,7 +18,7 @@ use rustc_span::Symbol; #[derive(Debug, Clone, Copy)] pub enum Res { Def(DefKind, DefId), - PrimTy(PrimTy), + PrimTy(SimplifiedType), } #[derive(Clone, Debug)] @@ -50,7 +50,7 @@ impl Res { fn from_def_res(res: def::Res) -> Result { match res { def::Res::Def(k, i) => Ok(Res::Def(k, i)), - def::Res::PrimTy(t) => Ok(Res::PrimTy(t)), + def::Res::PrimTy(t) => Ok(Res::PrimTy(prim_ty_to_simp_ty(t))), other => Err(ResolutionError::UnconvertibleRes(other)), } } @@ -62,6 +62,24 @@ impl Res { panic!("Not a def") } } + + pub fn as_string(&self, tcx: TyCtxt<'_>) -> String { + match self { + Res::Def(_, id) => tcx.def_path_str(*id), + Res::PrimTy(ty) => format!("{ty:?}"), + } + } +} + +fn prim_ty_to_simp_ty(pt: PrimTy) -> SimplifiedType { + match pt { + PrimTy::Bool => SimplifiedType::Bool, + PrimTy::Char => SimplifiedType::Char, + PrimTy::Str => SimplifiedType::Str, + PrimTy::Int(i) => SimplifiedType::Int(ty::int_ty(i)), + PrimTy::Uint(u) => SimplifiedType::Uint(ty::uint_ty(u)), + PrimTy::Float(f) => SimplifiedType::Float(ty::float_ty(f)), + } } fn as_primitive_ty(name: Symbol) -> Option { @@ -103,7 +121,7 @@ fn find_primitive_impls(tcx: TyCtxt<'_>, name: Symbol) -> impl Iterator Option { +pub fn expect_resolve_string_to_def_id(tcx: TyCtxt, path: &str, relaxed: bool) -> Vec { report_resolution_err(tcx, path, relaxed, resolve_string_to_def_id(tcx, path)) } @@ -111,8 +129,8 @@ pub fn report_resolution_err( tcx: TyCtxt<'_>, path: &str, relaxed: bool, - obj: Result, -) -> Option { + obj: Result>, +) -> Vec { let report_err = if relaxed { |tcx: TyCtxt<'_>, err: String| { tcx.dcx().warn(err); @@ -122,20 +140,25 @@ pub fn report_resolution_err( tcx.dcx().err(err); } }; - let res = obj + let Some(res) = obj .map_err(|e| report_err(tcx, format!("Could not resolve {path}: {e:?}"))) - .ok()?; - match res { - Res::Def(_, did) => Some(did), - other => { - let msg = format!("expected {path} to resolve to an item, got {other:?}"); - report_err(tcx, msg); - None - } - } + .ok() + else { + return vec![]; + }; + res.into_iter() + .filter_map(|res| match res { + Res::Def(_, did) => Some(did), + other => { + let msg = format!("expected {path} to resolve to an item, got {other:?}"); + report_err(tcx, msg); + None + } + }) + .collect() } -pub fn resolve_string_to_def_id(tcx: TyCtxt, path: &str) -> Result { +pub fn resolve_string_to_def_id(tcx: TyCtxt, path: &str) -> Result> { let mut hasher = StableHasher::new(); path.hash(&mut hasher); let mut parser = new_parser_from_source_str( @@ -242,16 +265,20 @@ fn find_crates(tcx: TyCtxt<'_>, name: Symbol) -> impl Iterator + ' fn resolve_ty<'tcx>(tcx: TyCtxt<'tcx>, t: &Ty) -> Result> { match &t.kind { TyKind::Path(qslf, pth) => { - let adt = def_path_res(tcx, qslf.as_deref(), pth.segments.as_slice())?; + let results = def_path_res(tcx, qslf.as_deref(), pth.segments.as_slice())?; + let [adt] = results.as_slice() else { + return Err(ResolutionError::UnsupportedType(t.clone())); + }; Ok(match adt { Res::Def(_, did) => ty::Ty::new_adt(tcx, tcx.adt_def(did), ty::List::empty()), - Res::PrimTy(t) => match t { - PrimTy::Bool => tcx.types.bool, - PrimTy::Char => tcx.types.char, - PrimTy::Str => tcx.types.str_, - PrimTy::Int(i) => ty::Ty::new_int(tcx, ty::int_ty(i)), - PrimTy::Uint(u) => ty::Ty::new_uint(tcx, ty::uint_ty(u)), - PrimTy::Float(f) => ty::Ty::new_float(tcx, ty::float_ty(f)), + Res::PrimTy(pt) => match pt { + SimplifiedType::Bool => tcx.types.bool, + SimplifiedType::Char => tcx.types.char, + SimplifiedType::Str => tcx.types.str_, + SimplifiedType::Int(i) => ty::Ty::new_int(tcx, *i), + SimplifiedType::Uint(u) => ty::Ty::new_uint(tcx, *u), + SimplifiedType::Float(f) => ty::Ty::new_float(tcx, *f), + _ => return Err(ResolutionError::UnsupportedType(t.clone())), }, }) } @@ -262,6 +289,44 @@ fn resolve_ty<'tcx>(tcx: TyCtxt<'tcx>, t: &Ty) -> Result> { .collect::>>()? .as_ref(), )), + TyKind::Array(inner, const_) => { + if !matches!(inner.kind, TyKind::Infer) { + tcx.dcx() + .warn(format!("Ignoring non-wildcard slice type {inner:?}")); + } + if !matches!(const_.value.kind, rustc_ast::ExprKind::Underscore) { + tcx.dcx() + .warn(format!("Ignoring const argument {const_:?} in array type")); + } + let our_len = 42; + warn!("Array types may not behave properly with instance markers because we always use the instance of arrays of length {our_len}"); + Ok(ty::Ty::new_array( + tcx, + ty::Ty::new_param(tcx, 0, Symbol::intern("T")), + our_len, + )) + } + TyKind::Slice(inner) => { + if !matches!(inner.kind, TyKind::Infer) { + tcx.dcx() + .warn(format!("Ignoring non-wildcard slice type {inner:?}")); + } + Ok(ty::Ty::new_slice( + tcx, + ty::Ty::new_param(tcx, 0, Symbol::intern("T")), + )) + } + TyKind::Ref(lt, mt) => { + if lt.is_some() { + tcx.dcx().warn("Ignoring lifetimes in reference types"); + } + let inner = resolve_ty(tcx, &mt.ty)?; + let lt = ty::Region::new_var(tcx, ty::RegionVid::ZERO); + Ok(match mt.mutbl { + hir::Mutability::Mut => ty::Ty::new_mut_ref(tcx, lt, inner), + hir::Mutability::Not => ty::Ty::new_imm_ref(tcx, lt, inner), + }) + } _ => Err(ResolutionError::UnsupportedType(t.clone())), } } @@ -290,7 +355,8 @@ fn resolve_ty<'tcx>(tcx: TyCtxt<'tcx>, t: &Ty) -> Result> { /// non-deterministically. /// 2. It probably cannot resolve a qualified path if the base is a primitive /// type. E.g. `usize::abs_diff` resolves but `::abs_diff` does not. -pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> Result { +pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> Result> { + trace!("Resolving {qself:?} {path:?}"); let no_generics_supported = |segment: &PathSegment| { if segment.args.is_some() { tcx.dcx().err(format!( @@ -303,9 +369,14 @@ pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> [primitive] => { /* Start here for issue 1 */ let sym = primitive.ident.name; - return PrimTy::from_name(sym) - .map(Res::PrimTy) - .ok_or(ResolutionError::CannotResolvePrimitiveType(sym)); + if let Some(sim_ty) = as_primitive_ty(sym) { + return Ok(vec![Res::PrimTy(sim_ty)]); + } else { + ( + Box::new(find_crates(tcx, sym)) as Box>, + &[], + ) + } } [base, ref path @ ..] => { /* This is relevant for issue 2 */ @@ -334,28 +405,31 @@ pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> let TyKind::Path(qslf, pth) = &slf.ty.kind else { return Err(ResolutionError::UnsupportedType((*slf.ty).clone())); }; - let ty = def_path_res(tcx, qslf.as_deref(), &pth.segments)?; - let impls = tcx.inherent_impls(ty.def_id()); - Box::new(impls.iter().copied()) as Box<_> + Box::new( + def_path_res(tcx, qslf.as_deref(), &pth.segments)? + .into_iter() + .flat_map(|def_id| tcx.inherent_impls(def_id.def_id()).iter().copied()), + ) as Box> } else { - let r#trait = def_path_res(tcx, None, &path[..slf.position])?; - let r#type = resolve_ty(tcx, &slf.ty)?; let mut impls = vec![]; - /* This is relevant for issue 2 */ - tcx.for_each_relevant_impl(r#trait.def_id(), r#type, |i| impls.push(i)); - if impls.is_empty() { - if tcx - .lang_items() - .clone_fn() - .is_some_and(|c| c == r#trait.def_id()) - && r#type.is_unit() - { - tcx.dcx().err("Cannot assign markers to the Clone instance of (), is not a real function"); + for r#trait in def_path_res(tcx, None, &path[..slf.position])? { + let r#type = resolve_ty(tcx, &slf.ty)?; + /* This is relevant for issue 2 */ + tcx.for_each_relevant_impl(r#trait.def_id(), r#type, |i| impls.push(i)); + if impls.is_empty() { + if tcx + .lang_items() + .clone_fn() + .is_some_and(|c| c == r#trait.def_id()) + && r#type.is_unit() + { + tcx.dcx().err("Cannot assign markers to the Clone instance of (), is not a real function"); + } + return Err(ResolutionError::NoImplsFound( + r#trait.def_id(), + (*slf.ty).clone(), + )); } - return Err(ResolutionError::NoImplsFound( - r#trait.def_id(), - (*slf.ty).clone(), - )); } Box::new(impls.into_iter()) as Box<_> }, @@ -364,45 +438,57 @@ pub fn def_path_res(tcx: TyCtxt, qself: Option<&QSelf>, path: &[PathSegment]) -> }; let starts = starts.collect::>(); + trace!("starts: {:?}", starts); let starts = starts.into_iter(); - let mut last = Err(if let Some(f) = path.first() { + let empty_err = Err(if let Some(f) = path.first() { ResolutionError::CouldNotResolveCrate(f.ident.name) } else { ResolutionError::EmptyStarts }); - for first in starts { - last = path - .iter() - // for each segment, find the child item - .try_fold(Res::Def(tcx.def_kind(first), first), |res, segment| { - no_generics_supported(segment); - let segment = segment.ident.name; - let def_id = res.def_id(); - if let Some(item) = item_child_by_name(tcx, def_id, segment) { - item - } else if matches!(res, Res::Def(DefKind::Enum | DefKind::Struct, _)) { - // it is not a child item so check inherent impl items - tcx.inherent_impls(def_id) - .iter() - .find_map(|&impl_def_id| item_child_by_name(tcx, impl_def_id, segment)) - .unwrap_or(Err(ResolutionError::CouldNotFindChild { + let mut found = starts + .map(|first| { + path.iter() + // for each segment, find the child item + .try_fold(Res::Def(tcx.def_kind(first), first), |res, segment| { + no_generics_supported(segment); + let segment = segment.ident.name; + let def_id = res.def_id(); + if let Some(item) = item_child_by_name(tcx, def_id, segment) { + item + } else if matches!(res, Res::Def(DefKind::Enum | DefKind::Struct, _)) { + // it is not a child item so check inherent impl items + tcx.inherent_impls(def_id) + .iter() + .find_map(|&impl_def_id| item_child_by_name(tcx, impl_def_id, segment)) + .unwrap_or(Err(ResolutionError::CouldNotFindChild { + item: def_id, + segment, + search_space: SearchSpace::InherentImpl, + })) + } else { + Err(ResolutionError::CouldNotFindChild { item: def_id, segment, - search_space: SearchSpace::InherentImpl, - })) - } else { - Err(ResolutionError::CouldNotFindChild { - item: def_id, - segment, - search_space: SearchSpace::Mod, - }) - } - }); + search_space: SearchSpace::Mod, + }) + } + }) + }) + .collect::>(); - if last.is_ok() { - return last; - } + if found.is_empty() { + empty_err + } else if found.iter().all(|f| f.is_err()) { + found.pop().unwrap().map(|_| vec![]) + } else { + Ok(found.into_iter().filter_map(|f| f.ok()) + .inspect(|res| { + trace!(" Resolved to {res:?}"); + if let Res::Def(_, did) = res { + trace!(" {} at {:?}", tcx.def_path_str(did), tcx.def_span(did)); + } + }) + .collect()) } - last } diff --git a/crates/paralegal-flow/tests/async_tests.rs b/crates/paralegal-flow/tests/async_tests.rs index 30464c9c2d..4d63800fab 100644 --- a/crates/paralegal-flow/tests/async_tests.rs +++ b/crates/paralegal-flow/tests/async_tests.rs @@ -761,3 +761,66 @@ fn async_args_and_local_variable() { assert!(!no_source.flows_to_any(&sinks)); }) } + +#[test] +fn pattern_arguments() { + inline_test! { + struct Params { + one: usize, + two: usize, + three: usize, + four: usize, + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn marked(result: usize) { + assert_eq!(result, 42); + } + + #[paralegal_flow::marker(source, arguments = [0])] + async fn main(Params { one, two, three, four } : Params) { + let result = one + three + four; + marked(result); + } + } + .check_ctrl(|ctrl| { + let target = ctrl.marked(Identifier::new_intern("target")); + let source = ctrl.marked(Identifier::new_intern("source")); + assert!(!target.is_empty()); + assert!(!source.is_empty()); + assert!(source.flows_to_data(&target)); + }); +} + +#[test] +fn pattern_arguments_poor_mans_tool_def() { + inline_test! { + struct Params { + one: usize, + two: usize, + three: usize, + four: usize, + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn marked(result: usize) { + assert_eq!(result, 42); + } + + #[paralegal_flow::marker(source, arguments = [0])] + fn main(Params { one, two, three, four } : Params) + -> std::pin::Pin>> { + std::boxed::Box::pin(async move { + let result = one + three + four; + marked(result); + }) as std::pin::Pin>> + } + } + .check_ctrl(|ctrl| { + let target = ctrl.marked(Identifier::new_intern("target")); + let source = ctrl.marked(Identifier::new_intern("source")); + assert!(!target.is_empty()); + assert!(!source.is_empty()); + assert!(source.flows_to_data(&target)); + }); +} diff --git a/crates/paralegal-flow/tests/marker_tests.rs b/crates/paralegal-flow/tests/marker_tests.rs index 4e762ee8b2..0aa3277fa3 100644 --- a/crates/paralegal-flow/tests/marker_tests.rs +++ b/crates/paralegal-flow/tests/marker_tests.rs @@ -9,9 +9,15 @@ use paralegal_spdg::{Identifier, InstructionKind}; const TEST_CRATE_NAME: &str = "tests/marker-tests"; const EXTRA_ARGS: &[&str] = &["--no-interprocedural-analysis"]; +const CRATE_MARKER_CRATE_PATH: &str = "tests/test-crates-for-crate-marker/consumer"; + lazy_static! { static ref TEST_CRATE_ANALYZED: bool = run_paralegal_flow_with_flow_graph_dump_and(TEST_CRATE_NAME, EXTRA_ARGS); + static ref CRATE_MARKER_CRATE_ANALYZED: bool = run_paralegal_flow_with_flow_graph_dump_and( + CRATE_MARKER_CRATE_PATH, + &["--external-annotations", "external-annotations.toml"] + ); } macro_rules! define_test { @@ -20,6 +26,32 @@ macro_rules! define_test { }; } +macro_rules! crate_marker_test { + ($($t:tt)*) => { + define_flow_test_template!(CRATE_MARKER_CRATE_ANALYZED, CRATE_MARKER_CRATE_PATH, $($t)*); + }; +} + +crate_marker_test!(crate_marker : ctrl -> { + assert!(!ctrl.marked("found").is_empty()); +}); + +crate_marker_test!(serde_json: ctrl -> { + assert!(!ctrl.marked("serde").is_empty()); + ctrl.assert_purity(true); +}); + +crate_marker_test!(memchr: ctrl -> { + assert!(!ctrl.marked("memchr").is_empty()); + ctrl.assert_purity(true); +}); + +crate_marker_test!(marker_overlap: ctrl -> { + assert!(!ctrl.marked("found").is_empty()); + assert!(!ctrl.marked("submod-conflict").is_empty()); + assert!(!ctrl.marked("direct-conflict").is_empty()); +}); + #[test] fn use_wrapper() { InlineTestBuilder::new(stringify! { @@ -317,3 +349,84 @@ fn async_fn_marker() { assert!(source.flows_to_data(&sink)); }); } + +#[test] +fn lifetime_resolving() { + inline_test! { + use std::marker::PhantomData; + + trait Test { + fn test_method(&self); + } + + struct Ref<'a> { + _marker: PhantomData<&'a ()>, + } + + impl Test for Ref<'_> { + fn test_method(&self) { + todo!() + } + } + + fn main() { + Ref { _marker: PhantomData }.test_method(); + } + } + .with_marker_file( + " + [[\"::test_method\"]] + marker = \"present\" + on_argument = [0] + ", + ) + .check_ctrl(|ctrl| assert!(dbg!(ctrl.markers()).contains(&Identifier::new_intern("present")))); +} + +#[test] +fn dont_inline_on_std_marker() { + inline_test! { + #[paralegal_flow::marker(target1)] + fn canary1 () {} + + #[paralegal_flow::marker(target2)] + fn canary2 (arg: usize) {} + + fn hidden1() { + canary1() + } + + fn hidden2(arg: usize) { + canary2(arg) + } + + fn intermediate1() { + hidden1() + } + + fn intermediate2(arg: usize) { + hidden2(arg) + } + + fn main() { + intermediate1(); + intermediate2(0); + } + }.with_marker_file( + " + [[\"crate::hidden1\"]] + marker = \"std:test\" + on_return = true + + [[\"crate::hidden2\"]] + marker = \"safe-libs:test\" + on_argument = [0] + " + ).with_extra_args(["--elide-on-whitelist-markers"]) + .check_ctrl(|ctrl| { + assert!(ctrl.marked("std:test").is_empty()); + assert!(ctrl.marked("safe-libs:test").is_empty()); + assert!(ctrl.marked("target1").is_empty()); + assert!(ctrl.marked("target2").is_empty()); + }); +} \ No newline at end of file diff --git a/crates/paralegal-flow/tests/purity/misc.rs b/crates/paralegal-flow/tests/purity/misc.rs index 291e9063a2..cc4001a31a 100644 --- a/crates/paralegal-flow/tests/purity/misc.rs +++ b/crates/paralegal-flow/tests/purity/misc.rs @@ -1,5 +1,4 @@ use paralegal_flow::{ann::db::AutoMarkers, define_flow_test_template, inline_test, test_utils::*}; -use paralegal_spdg::DisplayPath; const TEST_CRATE_NAME: &str = "tests/purity/test-crate-misc"; const EXTRA_ARGS: &[&str] = &[ @@ -84,21 +83,8 @@ define_test!(side_effect_tcp_flow: ctrl -> { }); define_test!(side_effect_vec: ctrl -> { + ctrl.show_side_effects(true); - let auto_markers = AutoMarkers::default(); - let auto = auto_markers.all(); - for m in auto { - let marked = ctrl.marked(m); - if !marked.is_empty() { - println!("Side effect {m}"); - } - for n in marked { - assert_eq!(n.info().at.root().function, ctrl.id()); - let d = DisplayPath::from(&ctrl.graph().desc.def_info[&n.info().at.leaf().function].path); - println!("{} in {} in {}", n.info().kind, n.instruction_info().description, d); - } - - } ctrl.assert_purity(true); }); @@ -162,3 +148,8 @@ fn clone_test_reachability() { define_test!(structs: ctrl -> { ctrl.assert_purity(true); }); + +define_test!(array_iter_marker: ctrl -> { + ctrl.assert_purity(true); + assert!(ctrl.has_function("into_iter")); +}); \ No newline at end of file diff --git a/crates/paralegal-flow/tests/purity/mod.rs b/crates/paralegal-flow/tests/purity/mod.rs index 913f254511..64b3173e2d 100644 --- a/crates/paralegal-flow/tests/purity/mod.rs +++ b/crates/paralegal-flow/tests/purity/mod.rs @@ -9,4 +9,5 @@ mod leaky; mod misc; mod raw_ptr; mod recursive; +mod side_effects; mod r#static; diff --git a/crates/paralegal-flow/tests/purity/side_effects.rs b/crates/paralegal-flow/tests/purity/side_effects.rs new file mode 100644 index 0000000000..83b2e078ca --- /dev/null +++ b/crates/paralegal-flow/tests/purity/side_effects.rs @@ -0,0 +1,35 @@ +use paralegal_flow::define_flow_test_template; +use paralegal_flow::test_utils::*; + +const TEST_CRATE_NAME: &str = "tests/purity/test-crate-side-effect"; +const EXTRA_ARGS: &[&str] = &[ + "--side-effect-markers", + "--external-annotations", + "../stdlib-markers.toml", +]; + +lazy_static! { + static ref TEST_CRATE_ANALYZED: bool = + run_paralegal_flow_with_flow_graph_dump_and(TEST_CRATE_NAME, EXTRA_ARGS); +} + +macro_rules! define_test { + ($($t:tt)*) => { + define_flow_test_template!(TEST_CRATE_ANALYZED, TEST_CRATE_NAME, $($t)*); + }; +} + +define_test!(fs: ctrl -> { + ctrl.show_side_effects(true); + + assert!(!ctrl.marked("side-effect:fs:write").is_empty()); +}); + +define_test!(path_eq: ctrl -> { + ctrl.show_side_effects(true); + ctrl.assert_purity(true); +}); + +define_test!(os_str_from_bytes: ctrl -> { + ctrl.assert_purity(true); +}); diff --git a/crates/paralegal-flow/tests/purity/stdlib-markers.toml b/crates/paralegal-flow/tests/purity/stdlib-markers.toml index f00ef92d77..5ef5d2437a 100644 --- a/crates/paralegal-flow/tests/purity/stdlib-markers.toml +++ b/crates/paralegal-flow/tests/purity/stdlib-markers.toml @@ -4,31 +4,50 @@ marker = "std:vec" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true -[["alloc::vec"]] +[["alloc::slice"]] marker = "std:slice" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["slice"]] +marker = "std:slice" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + [["core::slice"]] marker = "std:slice" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true [["alloc::string"]] marker = "std:string" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true -[["std::collection::hash_map"]] +[["std::collections::hash_map"]] marker = "std:hashmap" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true -[["std::collection::btree"]] +[["std::collections::btree"]] marker = "std:btree" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true # Impls of global allocator. # "alloc::alloc::\{impl#0\}", @@ -37,6 +56,14 @@ _internal_on_all_module_children = true marker = "std:allocation" _internal_can_fail_resolve_silently = true _internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["alloc::alloc::exchange_malloc"]] +marker = "std:allocation" +_internal_can_fail_resolve_silently = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true # allowed functions @@ -231,9 +258,118 @@ _internal_can_fail_resolve_silently = true marker = "ignore" on_return = true _internal_on_all_module_children = true +_internal_can_fail_resolve_silently = true [["crate::my_mod"]] marker = "ignore" on_return = true _internal_on_all_module_children = true +_internal_can_fail_resolve_silently = true + + +[["::fold"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true +_internal_can_fail_resolve_silently = true + +[["str::as_bytes"]] +marker = "std:string" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::next"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true +_internal_can_fail_resolve_silently = true + +[["::deref_mut"]] +marker = "side-effect:external-state" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["std::io::error"]] +marker = "std:error" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["core::future::get_context"]] +marker = "std:future" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +# Safe, because (safe) slice API observes lifetimes. Might have to exclude +# unsafe functions at some point though. +[["core::slice"]] +marker = "std:slice" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::get"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::get_mut"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::index"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::index_mut"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["std::fs::OpenOptions::open"]] +marker = "std:fs" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["<[_] as std::ops::Index>::index"]] +marker = "std:slice" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["std::path::Path::new"]] +marker = "std:fs" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["core::fmt::rt::Argument::new"]] +marker = "std:fmt" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["<&std::fs::File as std::io::Write>::write"]] +marker = "side-effect:fs:write" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::from_bytes"]] +marker = "std:str:OsStrExt-OsStr" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::as_bytes"]] +marker = "std:str:OsStrExt-OsStr" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["::eq"]] +marker = "std:path:eq" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["<[_;_] as core::iter::IntoIterator>::into_iter"]] +marker = "std:array_into_iter" +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true \ No newline at end of file diff --git a/crates/paralegal-flow/tests/purity/test-crate-misc/src/main.rs b/crates/paralegal-flow/tests/purity/test-crate-misc/src/main.rs index e5f7c71175..b53a048334 100644 --- a/crates/paralegal-flow/tests/purity/test-crate-misc/src/main.rs +++ b/crates/paralegal-flow/tests/purity/test-crate-misc/src/main.rs @@ -304,3 +304,8 @@ fn structs(a: usize) { foo.a = 30; foo.b = "hello2"; } + +#[paralegal::analyze] +fn array_iter_marker() { + [1,2,3].into_iter(); +} \ No newline at end of file diff --git a/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.lock b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.lock new file mode 100644 index 0000000000..fa2b97e6d4 --- /dev/null +++ b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.lock @@ -0,0 +1,49 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "paralegal" +version = "0.1.0" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "purity-tests-leaky" +version = "0.1.0" +dependencies = [ + "paralegal", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "unicode-ident" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" diff --git a/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.toml b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.toml new file mode 100644 index 0000000000..7a4fb9631f --- /dev/null +++ b/crates/paralegal-flow/tests/purity/test-crate-side-effect/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "purity-tests-leaky" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +paralegal = { path = "../../../../paralegal" } diff --git a/crates/paralegal-flow/tests/purity/test-crate-side-effect/src/main.rs b/crates/paralegal-flow/tests/purity/test-crate-side-effect/src/main.rs new file mode 100644 index 0000000000..0da8907e5e --- /dev/null +++ b/crates/paralegal-flow/tests/purity/test-crate-side-effect/src/main.rs @@ -0,0 +1,15 @@ +#[paralegal::analyze] +fn fs() { + std::fs::write("test", "Content").unwrap(); +} + +#[paralegal::analyze] +fn path_eq() { + std::path::Path::new("test") == std::path::Path::new("test"); +} + +#[paralegal::analyze] +fn os_str_from_bytes() { + use std::os::unix::prelude::OsStrExt; + std::ffi::OsStr::from_bytes(b"test"); +} diff --git a/crates/paralegal-flow/tests/sameness.rs b/crates/paralegal-flow/tests/sameness.rs new file mode 100644 index 0000000000..44e42ea8dd --- /dev/null +++ b/crates/paralegal-flow/tests/sameness.rs @@ -0,0 +1,309 @@ +#![feature(rustc_private)] + +use paralegal_flow::{inline_test, test_utils::*}; + +#[test] +fn simple() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn main() { + target(source()); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn from_async_arg() { + inline_test! { + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + #[paralegal_flow::marker(start, arguments = [0])] + async fn main(i: usize) { + target(i); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn no() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn main() { + target(source()+1); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_argument() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn wrapper(i: usize) { + target(i) + } + + fn main() { + wrapper(source()); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_return() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + fn wrapper(i: usize) { + target(i) + } + + fn main() { + wrapper(source()); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_opaque_call() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + + fn main() { + target(source().wrapping_add(3)); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn through_opaque_call_2() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: usize) {} + + #[paralegal_flow::marker(noinline, return)] + fn in_between(i: usize) -> usize { i } + + fn main() { + target(in_between(source())); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn in_aggregate() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> usize { 0 } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: T) {} + + fn main() { + let tup = (source(), 1, 2); + target(tup); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn not_from_projection() { + inline_test! { + #[paralegal_flow::marker(start, return)] + fn source() -> (usize, usize) { (0,0) } + + #[paralegal_flow::marker(end, arguments = [0])] + fn target(i: T) {} + + fn main() { + target(source().0); + } + } + .check_ctrl(|ctrl| { + let sources = ctrl.marked("start"); + let target = ctrl.marked("end"); + assert!(!sources.is_empty()); + assert!(!target.is_empty()); + assert!(!sources.flows_to_unchanged(&target)); + }); +} + +#[test] +fn test_mutable_references() { + // This doesn't actually test any functionality but is an experiment to see + // whether the PDG accurately shows an absence of a flow if a coerced + // reference is used as though it has local lifetime. + // + // When a simple stack variable like this is used the PDG correctly shows + // the absence of a flow. However using a `Vec` instead (see next test case) + // already introduces the flow. + // + // Note that we aren't analyzing the stdlib here, but even if we did, we add + // exception markers to vectors, so the outcome is likely the same. + + inline_test! { + use std::cell::RefCell; + + #[paralegal_flow::marker(source, return)] + fn mark(t: T) -> T{ + t + } + + struct S; + + fn with(mut f: impl for<'a> FnMut(&'a mut S)) { + let r = unsafe { std::mem::transmute(&mut SC) }; + f(r) + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn on_s(s: &mut S) {} + + const SC: S = S; + + fn main() { + let mutex : RefCell<&'static mut S> = RefCell::new(unsafe { std::mem::transmute(&mut SC) }); + with(|s| { + let s = mark(s); + let mut guard = mutex.borrow_mut(); + let mut my_s = s; + my_s = *guard; + on_s(my_s); + }) + } + }.check_ctrl(|ctrl| { + let sources = ctrl.marked("source"); + let targets = ctrl.marked("target"); + assert!(!sources.is_empty()); + assert!(!targets.is_empty()); + assert!(!sources.flows_to_data(&targets)); + }); +} + +#[test] +fn test_mutable_references_2() { + // See comments on `test_mutable_references` + + inline_test! { + use std::cell::RefCell; + + #[paralegal_flow::marker(source, return)] + fn mark(t: T) -> T{ + t + } + + struct S; + + fn with(mut f: impl for<'a> FnMut(&'a mut S)) { + let r = unsafe { std::mem::transmute(&mut SC) }; + f(r) + } + + #[paralegal_flow::marker(target, arguments = [0])] + fn on_s(s: &mut S) {} + + const SC: S = S; + + fn main() { + let mutex : RefCell<&'static mut S> = RefCell::new(unsafe { std::mem::transmute(&mut SC) }); + with(|s| { + let s = mark(s); + let mut v = vec![s]; + let mut guard = mutex.borrow_mut(); + v.pop(); + v.push(*guard); + on_s(v.pop().unwrap()); + }) + } + }.check_ctrl(|ctrl| { + let sources = ctrl.marked("source"); + let targets = ctrl.marked("target"); + assert!(!sources.is_empty()); + assert!(!targets.is_empty()); + assert!(sources.flows_to_data(&targets)); + }); +} diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.lock b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.lock new file mode 100644 index 0000000000..92ece0a444 --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.lock @@ -0,0 +1,130 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "consumer" +version = "0.1.0" +dependencies = [ + "memchr", + "paralegal", + "producer", + "serde_json", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "paralegal" +version = "0.1.0" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "producer" +version = "0.1.0" +dependencies = [ + "paralegal", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "syn" +version = "2.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.toml b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.toml new file mode 100644 index 0000000000..799a50a66e --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "consumer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +paralegal = { path = "../../../../paralegal" } +producer = { path = "../producer" } +serde_json = "1.0" +memchr = "2" diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/external-annotations.toml b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/external-annotations.toml new file mode 100644 index 0000000000..05ef2c7920 --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/external-annotations.toml @@ -0,0 +1,34 @@ +[["producer"]] +marker = "found" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["serde_core"]] +marker = "serde" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["memchr"]] +marker = "memchr" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +# [["memchr::memrchr"]] +# marker = "memchr" +# on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +# on_return = true + +[["producer::a_mod"]] +marker = "submod-conflict" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true + +[["producer::a_mod"]] +marker = "direct-conflict" +_internal_on_all_module_children = true +on_argument = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] +on_return = true diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/src/lib.rs b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/src/lib.rs new file mode 100644 index 0000000000..31f588859d --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/consumer/src/lib.rs @@ -0,0 +1,19 @@ +#[paralegal::analyze] +fn crate_marker() { + producer::target(); +} + +#[paralegal::analyze] +fn serde_json() { + serde_json::to_string(&34_i32).unwrap(); +} + +#[paralegal::analyze] +fn memchr() { + memchr::memrchr(b'a', b"hello"); +} + +#[paralegal::analyze] +fn marker_overlap() { + producer::a_mod::target() +} diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/Cargo.toml b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/Cargo.toml new file mode 100644 index 0000000000..4b5c1458df --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "producer" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +paralegal = { path = "../../../../paralegal" } diff --git a/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/src/lib.rs b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/src/lib.rs new file mode 100644 index 0000000000..8b15dab228 --- /dev/null +++ b/crates/paralegal-flow/tests/test-crates-for-crate-marker/producer/src/lib.rs @@ -0,0 +1,5 @@ +pub fn target() {} + +pub mod a_mod { + pub fn target() {} +} diff --git a/crates/paralegal-policy/src/algo/flows_to.rs b/crates/paralegal-policy/src/algo/flows_to.rs index bd102de65e..2153cd424b 100644 --- a/crates/paralegal-policy/src/algo/flows_to.rs +++ b/crates/paralegal-policy/src/algo/flows_to.rs @@ -138,8 +138,8 @@ fn test_ctrl_flows_to() { let src_c = ctx.controller_argument(controller, 2).unwrap(); let cs1 = crate::test_utils::get_callsite_node(&ctx, controller, "sink1"); let cs2 = crate::test_utils::get_callsite_node(&ctx, controller, "sink2"); - let switch_int_after_src_a = ctx.nth_successors(2, src_a); - let switch_int_after_src_c = ctx.nth_successors(2, src_c); + let switch_int_after_src_a = ctx.nth_successors(2, &src_a); + let switch_int_after_src_c = ctx.nth_successors(2, &src_c); assert!(switch_int_after_src_a.flows_to(&cs1, &ctx, EdgeSelection::Control)); assert!(switch_int_after_src_c.flows_to(&cs2, &ctx, EdgeSelection::Control)); assert!(switch_int_after_src_a.flows_to(&cs2, &ctx, EdgeSelection::Control)); diff --git a/crates/paralegal-policy/src/context.rs b/crates/paralegal-policy/src/context.rs index a210666b4c..c8b8d5d1a1 100644 --- a/crates/paralegal-policy/src/context.rs +++ b/crates/paralegal-policy/src/context.rs @@ -8,7 +8,8 @@ use std::{io::Write, process::exit, sync::Arc}; use fixedbitset::FixedBitSet; pub use paralegal_spdg::rustc_portable::{DefId, LocalDefId}; use paralegal_spdg::traverse::{ - generic_flows_to, generic_influencees, generic_influencers, EdgeSelection, + edge_generic_flows_to, generic_flows_to, generic_influencees, generic_influencers, + EdgeSelection, }; use paralegal_spdg::{ CallString, DefKind, DisplayNode, Endpoint, FunctionHandling, GlobalNode, HashMap, HashSet, @@ -281,7 +282,7 @@ impl RootContext { pub fn report_marker_if_absent(&self, marker: Marker) { assert_warning!( *self, - self.marker_to_ids.contains_key(&marker), + self.marker_is_defined(marker), format!("Marker {marker} is mentioned in the policy but not defined in source") ) } @@ -345,11 +346,20 @@ impl RootContext { /// /// If the controller with this id does not exist *or* the controller has /// fewer than `index` arguments. - pub fn controller_argument(&self, ctrl_id: Endpoint, index: u32) -> Option { + pub fn controller_argument(&self, ctrl_id: Endpoint, index: u32) -> Option { let ctrl = self.desc.controllers.get(&ctrl_id)?; - let inner = *ctrl.arguments.get(index as usize)?; + let inner = NodeCluster::new( + ctrl_id, + ctrl.arguments.iter().copied().filter(|n| { + ctrl.graph + .node_weight(*n) + .unwrap() + .is_arg + .is_some_and(|a| a == index as u16) + }), + ); - Some(GlobalNode::from_local_node(ctrl_id, inner)) + (!inner.nodes().is_empty()).then_some(inner) } /// Returns whether there is direct control flow influence from influencer to sink, or there is some node which is data-flow influenced by `influencer` and has direct control flow influence on `target`. Or as expressed in code: @@ -638,12 +648,17 @@ pub trait Context { fn root(&self) -> &RootContext; /// Returns an iterator over all objects marked with `marker`. - fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_; + fn marked_nodes(&self, marker: impl Into) -> impl Iterator + '_; /// All nodes with this marker, be that via type or directly fn nodes_marked_any_way(&self, marker: Marker) -> impl Iterator + '_; /// All nodes that have this marker through a type fn nodes_marked_via_type(&self, marker: Marker) -> impl Iterator + '_; + + /// Is this marker assigned to anything in the PDG? + fn marker_is_defined(&self, m: impl Into) -> bool { + self.root().marker_to_ids.contains_key(&m.into()) + } } impl Context for RootContext { @@ -667,7 +682,8 @@ impl Context for RootContext { }) } - fn marked_nodes(&self, marker: Marker) -> impl Iterator + '_ { + fn marked_nodes(&self, marker: impl Into) -> impl Iterator + '_ { + let marker = marker.into(); self.report_marker_if_absent(marker); self.marker_to_ids .get(&marker) @@ -729,7 +745,7 @@ where }); } } - generic_flows_to( + edge_generic_flows_to( self.iter_nodes(), edge_type, &ctx.desc.controllers[&cf_id], @@ -737,6 +753,7 @@ where ) .is_some() } + /// An optimized version of the following pattern that performs only one /// graph traversal instead of `sink.len()` traversals: /// @@ -808,7 +825,7 @@ where // }); // } // } - generic_flows_to( + edge_generic_flows_to( self.iter_nodes(), edge_type, &ctx.desc.controllers[&cf_id], @@ -1022,6 +1039,9 @@ pub trait NodeExt: private::Sealed { ctx: &RootContext, edge_selection: EdgeSelection, ) -> Option>; + + #[doc(hidden)] + fn flows_to_unchanged(self, other: GlobalNode, ctx: &RootContext) -> bool; } impl NodeExt for GlobalNode { @@ -1128,6 +1148,22 @@ impl NodeExt for GlobalNode { .collect() }) } + + fn flows_to_unchanged(self, other: GlobalNode, ctx: &RootContext) -> bool { + let cf_id = self.controller_id(); + if other.controller_id() != cf_id { + return false; + } + + let spdg = &ctx.desc.controllers[&cf_id]; + let graph = petgraph::visit::NodeFiltered::from_fn(&spdg.graph, |n| { + n == self.local_node() + || n == other.local_node() + || spdg.graph.node_weight(n).unwrap().same + }); + + generic_flows_to(self.iter_nodes(), &graph, other.iter_nodes()).is_some() + } } impl Sealed for &'_ T {} @@ -1181,6 +1217,10 @@ impl NodeExt for &'_ T { fn associated_call_site(self, ctx: &RootContext) -> CallString { (*self).associated_call_site(ctx) } + + fn flows_to_unchanged(self, other: GlobalNode, ctx: &RootContext) -> bool { + (*self).flows_to_unchanged(other, ctx) + } } /// Provide display trait for DefId in a Context. @@ -1270,11 +1310,11 @@ fn test_influencees() -> Result<()> { let sink_callsite = crate::test_utils::get_callsite_node(&ctx, ctrl_name, "sink1"); let influencees_data_control = ctx - .influencees(dbg!(src_a), EdgeSelection::Both) + .influencees(dbg!(&src_a), EdgeSelection::Both) .unique() .collect::>(); let influencees_data = ctx - .influencees(src_a, EdgeSelection::Data) + .influencees(&src_a, EdgeSelection::Data) .unique() .collect::>(); @@ -1330,11 +1370,11 @@ fn test_influencers() -> Result<()> { .collect::>(); assert!( - influencers_data_control.contains(&src_a), + influencers_data_control.contains(&src_a.try_as_single().unwrap()), "input argument a influences sink via data" ); assert!( - influencers_data_control.contains(&dbg!(src_b)), + influencers_data_control.contains(&dbg!(src_b.try_as_single().unwrap())), "input argument b influences sink via control" ); assert!( @@ -1346,11 +1386,11 @@ fn test_influencers() -> Result<()> { ); assert!( - !influencers_data.contains(&src_a), + !influencers_data.contains(&src_a.try_as_single().unwrap()), "input argument a doesnt influences sink via data" ); assert!( - influencers_data.contains(&src_b), + influencers_data.contains(&src_b.try_as_single().unwrap()), "input argument b influences sink via data" ); assert!( @@ -1379,7 +1419,7 @@ fn test_has_ctrl_influence() -> Result<()> { let sink_cs = crate::test_utils::get_callsite_or_datasink_node(&ctx, ctrl_name, "sink1"); assert!( - ctx.has_ctrl_influence(src_a, &sink_cs), + ctx.has_ctrl_influence(&src_a, &sink_cs), "src_a influences sink" ); assert!( @@ -1391,7 +1431,7 @@ fn test_has_ctrl_influence() -> Result<()> { "validate_foo influences sink" ); assert!( - !ctx.has_ctrl_influence(src_b, &sink_cs), + !ctx.has_ctrl_influence(&src_b, &sink_cs), "src_b doesnt influence sink" ); assert!( diff --git a/crates/paralegal-policy/src/diagnostics.rs b/crates/paralegal-policy/src/diagnostics.rs index 79269e87f1..1d136ef88f 100644 --- a/crates/paralegal-policy/src/diagnostics.rs +++ b/crates/paralegal-policy/src/diagnostics.rs @@ -96,7 +96,9 @@ use indexmap::IndexMap; use std::rc::Rc; use std::{io::Write, sync::Arc}; -use paralegal_spdg::{DisplayPath, Endpoint, GlobalNode, Identifier, Span, SpanCoord, SPDG}; +use paralegal_spdg::{ + DisplayPath, Endpoint, GlobalNode, Identifier, NodeCluster, Span, SpanCoord, SPDG, +}; use crate::{Context, NodeExt, RootContext}; @@ -106,16 +108,11 @@ macro_rules! assert_error { ($ctx:expr, $cond: expr $(,)?) => { assert_error!($ctx, $cond, "Error: {}", stringify!($cond)) }; - ($ctx:expr, $cond: expr, $msg:expr $(,)?) => { - if !$cond { - use $crate::diagnostics::Diagnostics; - Diagnostics::error(&$ctx, $msg); - } - }; - ($ctx:expr, $cond: expr, $msg:expr, $($frag:expr),+ $(,)?) => { + + ($ctx:expr, $cond: expr, $msg:expr $(, $($frag:expr),+ )? $(,)?) => { if !$cond { use $crate::diagnostics::Diagnostics; - Diagnostics::error(&$ctx, format!($msg, $($frag),+)); + Diagnostics::error(&$ctx, format!($msg, $( $($frag),+ )? )); } }; } @@ -892,6 +889,27 @@ impl ControllerContext { }) .map(Arc::new) } + + /// Get the nodes representing the `idx`'th argument to this controller (if + /// the argument exists). + pub fn argument(&self, idx: u32) -> Option { + self.controller_argument(self.id, idx) + } + + /// Get all arguments for this controller grouped by argument index + pub fn arguments_by_index(&self) -> Vec { + let mut args = vec![]; + let g = &self.current().graph; + for n in self.current().arguments.iter() { + let w = g.node_weight(*n).unwrap(); + let Some(a) = w.is_arg else { continue }; + if (a as usize) >= args.len() { + args.resize_with((a as usize) + 1, || NodeCluster::new(self.id(), [])); + } + args[a as usize].push_node(*n); + } + args + } } impl HasDiagnosticsBase for ControllerContext { @@ -914,7 +932,10 @@ impl Context for ControllerContext { self.as_ctx() } - fn marked_nodes(&self, marker: crate::Marker) -> impl Iterator + '_ { + fn marked_nodes( + &self, + marker: impl Into, + ) -> impl Iterator + '_ { self.root() .marked_nodes(marker) .filter(|n| n.controller_id() == self.id) diff --git a/crates/paralegal-policy/src/lib.rs b/crates/paralegal-policy/src/lib.rs index 508dafcefa..6b8d8d5519 100644 --- a/crates/paralegal-policy/src/lib.rs +++ b/crates/paralegal-policy/src/lib.rs @@ -164,6 +164,13 @@ impl SPDGGenCommand { self } + /// Register a Rust function as entrypoint for the analysis. Equivalent to + /// the `#[analyze]` annotation. + pub fn analysis_target(&mut self, target: impl AsRef) -> &mut Self { + self.0.args(["--analyze", target.as_ref()]); + self + } + /// Abort compilation once the analysis artifacts have been created. Also /// sets the expectation for the compilation to succeed to `false`. pub fn abort_after_analysis(&mut self) -> &mut Self { diff --git a/crates/paralegal-spdg/build.rs b/crates/paralegal-spdg/build.rs index 9cc9c3798a..678c0d4390 100644 --- a/crates/paralegal-spdg/build.rs +++ b/crates/paralegal-spdg/build.rs @@ -21,7 +21,27 @@ fn get_rustup_lib_path() -> PathBuf { rustup_lib } -/// Helper for calculating a hash of all (modification dates of) Rust files in this crate. +#[allow(dead_code)] +fn hash_via_modification_date(path: &Path, hasher: &mut DefaultHasher) -> Result<()> { + let metadata = path.metadata()?; + let modified = metadata.modified()?; + + let duration = modified.duration_since(SystemTime::UNIX_EPOCH)?; + duration.as_secs().hash(hasher); + Ok(()) +} + +#[allow(dead_code)] +fn hash_via_content(path: &Path, hasher: &mut DefaultHasher) -> std::io::Result<()> { + use std::io::Read; + let mut file = std::fs::File::open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + buffer.hash(hasher); + Ok(()) +} + +/// Helper for calculating a hash of all Rust files in this crate. fn visit_dirs(dir: &Path, hasher: &mut DefaultHasher) -> Result<()> { if !dir.is_dir() { return Ok(()); @@ -32,11 +52,7 @@ fn visit_dirs(dir: &Path, hasher: &mut DefaultHasher) -> Result<()> { if path.is_dir() { visit_dirs(&path, hasher)?; } else if path.extension().is_some_and(|ext| ext == "rs") { - let metadata = entry.metadata()?; - let modified = metadata.modified()?; - - let duration = modified.duration_since(SystemTime::UNIX_EPOCH)?; - duration.as_secs().hash(hasher); + hash_via_content(&path, hasher)?; // Tell Cargo to rerun if this source file changes println!("cargo:rerun-if-changed={}", path.display()); } diff --git a/crates/paralegal-spdg/src/lib.rs b/crates/paralegal-spdg/src/lib.rs index d457ba6eb1..d5adf789cd 100644 --- a/crates/paralegal-spdg/src/lib.rs +++ b/crates/paralegal-spdg/src/lib.rs @@ -306,6 +306,18 @@ impl Span { } } +impl std::fmt::Display for Span { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{}:{}:{}", + self.source_file.abs_file_path.display(), + self.start.line, + self.start.col, + ) + } +} + /// Metadata on a function call. #[derive(Debug, Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Allocative)] pub struct FunctionCallInfo { @@ -765,7 +777,7 @@ pub mod node_cluster { #[derive(Debug, Hash, Clone)] pub struct NodeCluster { controller_id: Endpoint, - nodes: Box<[Node]>, + nodes: Vec, } /// Owned iterator of a [`NodeCluster`] @@ -857,9 +869,23 @@ pub mod node_cluster { controller_id: ctrl_id, nodes: std::iter::once(Some(first.local_node())) .chain(it.map(|n| (n.controller_id() == ctrl_id).then_some(n.local_node()))) - .collect::>>()?, + .collect::>()?, }) } + + /// Returns some if this cluster contains only one node. + pub fn try_as_single(&self) -> Option { + if let [node] = &*self.nodes { + Some(GlobalNode::from_local_node(self.controller_id, *node)) + } else { + None + } + } + + #[doc(hidden)] + pub fn push_node(&mut self, node: Node) { + self.nodes.push(node); + } } } @@ -893,6 +919,13 @@ pub struct NodeInfo { /// Whether this node is an argument to a function call, and if so which pub is_arg: Option, + + /// Is this node (the value behind it) the same before and after the + /// operation applied at this location? + /// + /// Specifically this considers taking references (and pointers) and + /// dereferencing them to mean the value is the same. + pub same: bool, } impl Display for NodeInfo { @@ -1191,3 +1224,41 @@ impl Display for DisplayNode<'_> { } } } + +pub struct AutoMarkers { + pub side_effect_unknown_virtual: Identifier, + pub side_effect_foreign: Identifier, + pub side_effect_unknown_fn_ptr: Identifier, + pub side_effect_raw_ptr: Identifier, + pub side_effect_transmute: Identifier, + pub side_effect_unknown: Identifier, + pub side_effect_intrinsic: Identifier, +} + +impl Default for AutoMarkers { + fn default() -> Self { + AutoMarkers { + side_effect_unknown_virtual: Identifier::new_intern("auto:side-effect:unknown:virtual"), + side_effect_foreign: Identifier::new_intern("auto:side-effect:foreign"), + side_effect_unknown_fn_ptr: Identifier::new_intern("auto:side-effect:unknown:fn-ptr"), + side_effect_raw_ptr: Identifier::new_intern("auto:side-effect:raw-ptr"), + side_effect_transmute: Identifier::new_intern("auto:side-effect:transmute"), + side_effect_unknown: Identifier::new_intern("auto:side-effect:unknown"), + side_effect_intrinsic: Identifier::new_intern("auto:side-effect:intrinsic"), + } + } +} + +impl AutoMarkers { + pub fn all(&self) -> [Identifier; 7] { + [ + self.side_effect_unknown_virtual, + self.side_effect_foreign, + self.side_effect_unknown_fn_ptr, + self.side_effect_raw_ptr, + self.side_effect_transmute, + self.side_effect_unknown, + self.side_effect_intrinsic, + ] + } +} diff --git a/crates/paralegal-spdg/src/traverse.rs b/crates/paralegal-spdg/src/traverse.rs index 0077b54e49..324b527ab5 100644 --- a/crates/paralegal-spdg/src/traverse.rs +++ b/crates/paralegal-spdg/src/traverse.rs @@ -67,19 +67,34 @@ impl EdgeSelection { /// A primitive that queries whether we can reach from one set of nodes to /// another -pub fn generic_flows_to( +pub fn edge_generic_flows_to( from: impl IntoIterator, edge_selection: EdgeSelection, spdg: &SPDG, other: impl IntoIterator, ) -> Option<(Node, Node)> { + let graph = edge_selection.filter_graph(&spdg.graph); + generic_flows_to(from, &graph, other) +} + +use petgraph::visit as pgv; + +/// A primitive that queries whether we can reach from one set of nodes to +/// another +pub fn generic_flows_to( + from: impl IntoIterator, + graph: G, + other: impl IntoIterator, +) -> Option<(Node, Node)> +where + G: pgv::Visitable + pgv::IntoNeighbors + pgv::GraphBase, +{ let targets = other.into_iter().collect::>(); let mut from = from.into_iter().peekable(); if from.peek().is_none() || targets.is_empty() { return None; } - let graph = edge_selection.filter_graph(&spdg.graph); let mut search = petgraph::visit::Dfs::from_parts(vec![], graph.visit_map()); for n in from { search.move_to(n); @@ -90,15 +105,6 @@ pub fn generic_flows_to( } } None - - // let result = petgraph::visit::depth_first_search(&graph, from, |event| match event { - // DfsEvent::Discover(d, _) if targets.contains(&d) => Control::Break(d), - // _ => Control::Continue, - // }); - // match result { - // Control::Break(r) => Some(r), - // _ => None, - // } } /// The current policy for this iterator is that it does not return the start