From 7ca59720013e926c3fda01847a2f4fabc1f35e77 Mon Sep 17 00:00:00 2001 From: hermanventer Date: Mon, 9 Dec 2024 11:43:47 -0800 Subject: [PATCH] Option to print summaries --- Cargo.lock | 47 ++++++------- binaries/summary_store.tar | Bin 3527168 -> 3527168 bytes checker/src/call_graph.rs | 21 +++++- checker/src/callbacks.rs | 4 ++ checker/src/crate_visitor.rs | 14 +++- checker/src/options.rs | 2 +- checker/src/summaries.rs | 125 ++++++++++++++++++++++++++++++++++- 7 files changed, 179 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8cb529a1..00c4de6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,7 +47,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -57,7 +57,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.2" +version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" +checksum = "27f657647bcff5394bf56c7317665bbf790a137a50eaaa5c6bfbb9e27a518f2d" dependencies = [ "shlex", ] @@ -351,14 +351,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "fastrand" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "filetime" @@ -369,7 +369,7 @@ dependencies = [ "cfg-if", "libc", "libredox", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -486,9 +486,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.167" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libloading" @@ -805,15 +805,15 @@ checksum = "3316159ab19e19d1065ecc49278e87f767a9dae9fae80348d2b4d4fa4ae02d4d" [[package]] name = "rustix" -version = "0.38.41" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -993,23 +993,23 @@ dependencies = [ "fastrand", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] name = "thiserror" -version = "2.0.4" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490" +checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.4" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" +checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" dependencies = [ "proc-macro2", "quote", @@ -1107,7 +1107,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -1116,15 +1116,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/binaries/summary_store.tar b/binaries/summary_store.tar index 968b52263fa1d53414fe814f4344a847ca6376d1..54f5644ba6a1fe480414eee3e5f379b1f8c82031 100644 GIT binary patch delta 927 zcmajeJ!lhQ7zgma$u-(En#9^#6R##wn;P5q-jDleP(iSR4!SF7)H=A8E>Z`vC>Z(y z+M&Lq4uV_N1p7`-)hOs96oOz82X`x|Q;Gk?_RA2%@ zTX2?Q++&Qlq}WXQSW^6;4=h0681qovj_G=3eu2)18@)yK`&$#G5c!K@7K>it<|bET(>fd zOT}nUpN{q%ezk0jT(C@4jpnYYxq14jn2hJ(2=v2%PR6NvX!I7sPt>Uw^t58jG^rEM zX@ES?bV5CQlQmSTM9=D9B|027`X8-)))!yWq2Q+SvDY+28WFm%Kt~6R>C%}Gdhr|e z{&QzY=U3>7D@J6vYf0TKSYxvv0}=IO!aApZ7ObOc_L6nm5Z&2(I(gZe4{WN&BtNd_ zu3CwVk^4Ft*YEFIU;c8>jbdfe0tOreNU(Kf(y4gOx#gZGr2p$RS#bPW!U_p)J6NHu fv-D?6xNWLTzp_>M^Hcgm0^j z7V5ejE&QORYe{Kb+7`8GQ&^-WjJiy83S*wdY%VHByd_Ksj+XynJun9A z4#sXbnTMGj>bPFT&|AZ(z-0Sh>XBlU6hi?Of}lYN!f*%KS&{{RZF4~LcYcyAxV~+V#1vR?&mi50bnA~5UG9PM@yQ@EBvS7YZ+t#cVhsWfL z8B_1q#-FP5^5eYuy<=suR9!T|0tz-bz+`oiS4CrQsSvEON>W+pt-HiThh3sJ8}+#3 UF==e^)@9;O<4=&v2VM{V1^HqIJ^%m! diff --git a/checker/src/call_graph.rs b/checker/src/call_graph.rs index 6245abe1..48ac1005 100644 --- a/checker/src/call_graph.rs +++ b/checker/src/call_graph.rs @@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize}; use mirai_annotations::*; use rustc_hir::def_id::DefId; use rustc_middle::ty::TyCtxt; +use rustc_span::Span; // An unique identifier for a Rust type string. type TypeId = u32; @@ -116,6 +117,8 @@ pub struct CallGraphConfig { included_crates: Vec>, /// Datalog output configuration datalog_config: Option, + /// If true, collect all call sites. + pub include_calls_in_summaries: bool, } impl CallGraphConfig { @@ -132,6 +135,7 @@ impl CallGraphConfig { reductions, included_crates, datalog_config, + include_calls_in_summaries: false, } } @@ -314,7 +318,7 @@ impl CallGraphEdge { #[derive(Clone)] pub struct CallGraph<'tcx> { /// Configuration for the call graph - config: CallGraphConfig, + pub config: CallGraphConfig, /// A context to use for getting the information about DefIds that is needed /// to generate fully qualified names for them. tcx: TyCtxt<'tcx>, @@ -368,7 +372,7 @@ impl<'tcx> CallGraph<'tcx> { self.config.dot_output_path.is_some() || self.config.datalog_config.is_some() } - /// Produce an updated call graph structure that preserves all of the + /// Produce an updated call graph structure that preserves all the /// fields except `graph`, which is replaced. fn update(&self, graph: Graph) -> CallGraph<'tcx> { CallGraph { @@ -433,7 +437,10 @@ impl<'tcx> CallGraph<'tcx> { callee: DefId, external_callee: bool, ) { - if self.config.call_sites_output_path.is_some() && !self.non_local_defs.contains(&caller) { + if self.config.include_calls_in_summaries + || (self.config.call_sites_output_path.is_some() + && !self.non_local_defs.contains(&caller)) + { self.call_sites.insert(loc, (caller, callee)); if external_callee { self.non_local_defs.insert(callee); @@ -1082,6 +1089,14 @@ impl<'tcx> CallGraph<'tcx> { call_graph.to_call_sites(Path::new(call_path.as_ref())); } } + + pub fn get_calls_for_def_ids(&self) -> HashMap> { + let mut calls = HashMap::>::new(); + for (span, (caller, callee)) in self.call_sites.iter() { + calls.entry(*caller).or_default().push((*span, *callee)); + } + calls + } } /// Supported Datalog output formats diff --git a/checker/src/callbacks.rs b/checker/src/callbacks.rs index 51eec854..fe95c5b3 100644 --- a/checker/src/callbacks.rs +++ b/checker/src/callbacks.rs @@ -168,7 +168,11 @@ impl MiraiCallbacks { type_cache: Rc::new(RefCell::new(TypeCache::new())), call_graph: CallGraph::new(call_graph_config, tcx), }; + if crate_visitor.options.print_summaries { + crate_visitor.call_graph.config.include_calls_in_summaries = true; + } crate_visitor.analyze_some_bodies(); crate_visitor.call_graph.output(); + crate_visitor.print_summaries(); } } diff --git a/checker/src/crate_visitor.rs b/checker/src/crate_visitor.rs index 2be69341..25530ffa 100644 --- a/checker/src/crate_visitor.rs +++ b/checker/src/crate_visitor.rs @@ -80,7 +80,7 @@ impl<'compilation> CrateVisitor<'compilation, '_> { DefId::local(DefIndex::from_u32(0)) }; - // Analyze all functions that are white listed or public + // Analyze all functions that are whitelisted or public let building_standard_summaries = std::env::var("MIRAI_START_FRESH").is_ok(); for local_def_id in self.tcx.hir().body_owners() { let def_id = local_def_id.to_def_id(); @@ -185,6 +185,7 @@ impl<'compilation> CrateVisitor<'compilation, '_> { let kind = self.tcx.def_kind(def_id); if matches!(kind, rustc_hir::def::DefKind::Static { .. }) || utils::is_foreign_contract(self.tcx, def_id) + || self.options.print_summaries { self.summary_cache .set_summary_for(def_id, self.tcx, summary); @@ -290,4 +291,15 @@ impl<'compilation> CrateVisitor<'compilation, '_> { } } } + + pub fn print_summaries(&mut self) { + if !self.options.print_summaries { + return; + } + let calls_for_def_ids = self.call_graph.get_calls_for_def_ids(); + let summaries_for_llm = self + .summary_cache + .get_summaries_for_llm(self.tcx, calls_for_def_ids); + print!("{}", summaries_for_llm.to_json()); + } } diff --git a/checker/src/options.rs b/checker/src/options.rs index 91c4e253..641acb3e 100644 --- a/checker/src/options.rs +++ b/checker/src/options.rs @@ -243,7 +243,7 @@ impl Options { matches.value_source("print_summaries"), Some(ValueSource::DefaultValue) ) { - self.print_summaries = false; + self.print_summaries = true; } args[rustc_args_start..].to_vec() } diff --git a/checker/src/summaries.rs b/checker/src/summaries.rs index 32b5d27f..e26aa7e8 100644 --- a/checker/src/summaries.rs +++ b/checker/src/summaries.rs @@ -5,9 +5,11 @@ use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; +use std::env::current_dir; use std::fmt::{Debug, Formatter, Result}; use std::hash::{Hash, Hasher}; use std::ops::Deref; +use std::path::PathBuf; use std::rc::Rc; use itertools::Itertools; @@ -18,6 +20,7 @@ use sled::{Config, Db}; use mirai_annotations::*; use rustc_hir::def_id::DefId; use rustc_middle::ty::{Ty, TyCtxt}; +use rustc_span::Span; use crate::abstract_value::AbstractValue; use crate::abstract_value::AbstractValueTrait; @@ -275,7 +278,7 @@ fn add_provenance(preconditions: &[Precondition], tcx: TyCtxt<'_>) -> Vec SummaryCache<'tcx> { store_path } + pub fn get_summaries_for_llm( + &self, + tcx: TyCtxt, + call_site_per_def_id: HashMap>, + ) -> SummariesForLLM { + let source_map = tcx.sess.source_map(); + let mut entries = Vec::new(); + for (key, value) in self.def_id_cache.iter() { + let fully_qualified_name = self.key_cache.get(key).unwrap().to_string(); + let file_name; + let source; + if tcx.is_mir_available(*key) { + let mir = if tcx.is_const_fn(*key) { + tcx.mir_for_ctfe(*key) + } else { + let instance = rustc_middle::ty::InstanceKind::Item(*key); + tcx.instance_mir(instance) + }; + file_name = source_map.span_to_filename(mir.span).into_local_path(); + source = source_map + .span_to_snippet(mir.span) + .ok() + .unwrap_or_default(); + } else { + let span = tcx.def_span(*key); + file_name = source_map.span_to_filename(span).into_local_path(); + source = source_map.span_to_snippet(span).ok().unwrap_or_default(); + } + let mut path = None; + if let Some(mut p) = file_name { + if p.is_absolute() { + p = p + .strip_prefix(current_dir().unwrap_or_default()) + .unwrap_or(&p) + .to_path_buf(); + if p.is_absolute() { + let sysroot = utils::find_sysroot(); + let rel = p.strip_prefix(sysroot).unwrap_or(&p); + p = PathBuf::from("/").join(rel).to_path_buf(); + } + } + path = Some(p.to_string_lossy().to_string()); + } + let mut calls = vec![]; + if let Some(call_vec) = call_site_per_def_id.get(key) { + for (span, def_id) in call_vec.iter() { + let call_snippet = source_map.span_to_snippet(*span).ok().unwrap_or_default(); + let callee_name = self.key_cache.get(def_id).unwrap().to_string(); + calls.push((call_snippet, callee_name)); + } + }; + entries.push(( + path.unwrap_or_default(), + fully_qualified_name, + source, + LLMSummary::from_summary(value, calls), + )); + } + SummariesForLLM { entries } + } + /// Returns (and caches) a string that uniquely identifies a definition to serve as a key to /// the summary cache, which is a key value store. The string will always be the same as /// long as the definition does not change its name or location, so it can be used to @@ -645,6 +709,12 @@ impl<'tcx> SummaryCache<'tcx> { summary: Summary, ) { if let Some(func_id) = func_ref.function_id { + // if let Some(def_id) = func_ref.def_id { + // if func_args.is_none() && type_args.is_none() { + // info!("caching summary for def_id {:?}", def_id); + // self.def_id_cache.insert(def_id, summary.clone()); + // } + // } if func_args.is_some() || type_args.is_some() { let typed_cache_key = CallSiteKey::new(func_args.clone(), type_args.clone(), func_id); @@ -676,3 +746,56 @@ impl<'tcx> SummaryCache<'tcx> { self.def_id_cache.insert(def_id, summary) } } + +#[derive(Serialize)] +pub struct SummariesForLLM { + // (source path, fully qualified function name, function source, summary) + entries: Vec<(String, String, String, LLMSummary)>, +} + +impl SummariesForLLM { + pub fn to_json(&self) -> String { + serde_json::to_string_pretty(&self).unwrap() + } +} + +#[derive(Serialize)] +pub struct LLMSummary { + // Conditions that should hold prior to the call. + // Callers should substitute parameter values with argument values and simplify the results + // under the current path condition. Any values that do not simplify to true will require the + // caller to either generate an error message or to add a precondition to its own summary that + // will be sufficient to ensure that all the preconditions in this summary are met. + // The string value bundled with the condition is the message that details what would go + // wrong at runtime if the precondition is not satisfied by the caller. + //pub preconditions: Vec, + + // Modifications the function makes to mutable state external to the function. + // Every path will be rooted in a static or in a mutable parameter. + // No two paths in this collection will lead to the same place in memory. + // Callers should substitute parameter values with argument values and simplify the results + // under the current path condition. They should then update their current state to reflect the + // side effects of the call. + //pub side_effects: Vec<(Rc, Rc)>, + + // A condition that should hold after a call that completes normally. + // Callers should substitute parameter values with argument values and simplify the results + // under the current path condition. + // The resulting value should be conjoined to the current path condition. + //pub post_condition: Option>, + + // The set of function calls made by this function. The first element is the source snippet of + // the call and the second is the fully qualified name of the function being called. + calls: Vec<(String, String)>, +} + +impl LLMSummary { + pub fn from_summary(_summary: &Summary, calls: Vec<(String, String)>) -> LLMSummary { + LLMSummary { + // preconditions: vec![], + // side_effects: vec![], + // post_condition: vec![], + calls, + } + } +}