diff --git a/Cargo.toml b/Cargo.toml index 06e9aa7..0eb29a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,12 +15,12 @@ exclude = ["examples"] ahash = "0.8.11" bincode = "1.3.3" chashmap = { version = "2.2.2", optional = true } -clap = { version = "=4.5.10", features = ["derive"] } +clap = { version = "=4.5.11", features = ["derive"] } contrie = { version = "0.1.4", optional = true } core_affinity = "0.8.1" ctrlc = "3.4.4" dashmap = { version = "6.0.1", features = ["inline"], optional = true } -env_logger = "0.11.3" +env_logger = "0.11.5" figment = { version = "0.10.19", features = ["toml", "env"] } flurry = { version = "0.5.1", optional = true } hashbrown = "0.14.5" @@ -28,14 +28,14 @@ hdrhistogram = "7.5.4" inventory = "0.3.15" jemallocator = "0.5.4" log = "0.4.22" -mio = { version = "1.0.0", features = ["net", "os-poll"] } -papaya = { version = "0.1.1", optional = true } +mio = { version = "1.0.1", features = ["net", "os-poll"] } +papaya = { version = "0.1.2", optional = true } parking_lot = "0.12.3" quanta = "0.12.3" rand = "0.8.5" -scc = { version = "2.1.2", optional = true } +scc = { version = "2.1.6", optional = true } serde = { version = "1.0.204", features = ["derive"] } -toml = "0.8.14" +toml = "0.8.16" zipf = "7.0.1" [features] diff --git a/src/bench.rs b/src/bench.rs index 7132c4a..0fdde7a 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -46,21 +46,20 @@ //! //! When measuring throughput, an output may look like the following: //! ```txt -//! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 -//! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 -//! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 +//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 //! ``` //! //! The general format is: //! //! ```txt -//! phase

repeat duration elapsed total mops +//! phase

repeat duration elapsed total mops //! ``` //! //! Where: //! -//! - ``: report sequence number, also the line number of the output. //! - `

`: phase id. //! - ``: repeat id in a phase, or string `finish .`, if the line is the aggregated report //! of a whole phase. @@ -78,10 +77,10 @@ //! measurements, but the `finish` line has extra output like the following: //! //! ```txt -//! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 -//! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 -//! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 avg_ns 50 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 +//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 avg_ns 50 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 //! ``` //! //! The extra output on the last line has a format of: @@ -106,10 +105,10 @@ //! latency metrics above. In that case, the output will be like the following: //! //! ```txt -//! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 -//! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 -//! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 avg_ns 50 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 cdf_ns ... +//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_ns 1 max_ns 100 avg_ns 50 p50_ns 50 p95_ns 95 p99_ns 99 p999_ns 100 cdf_ns ... //! ``` //! Since the latency metrics vary a lot between different benchmarks/runs, the number of data //! points of the CDF is different. Therefore, it is printed at the end of the output only. It is @@ -141,7 +140,7 @@ use parking_lot::Mutex; use quanta::Instant; use serde::Deserialize; use std::rc::Rc; -use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Barrier}; use std::time::Duration; @@ -601,10 +600,6 @@ struct WorkerContext { /// output measurements: Vec>, - /// The sequence number of the output. This value is actually only used by one thread at a - /// time, but it is shared among all workers. - seq: Arc, - /// Barrier that syncs all workers barrier: Arc, @@ -620,7 +615,6 @@ fn bench_stat_repeat( start: Instant, end: Instant, thread_info: (usize, usize), - seq: &Arc, measurements: &Vec>, ) { assert!(thread_info.0 == 0); @@ -646,16 +640,9 @@ fn bench_stat_repeat( if benchmark.report == ReportMode::Repeat || benchmark.report == ReportMode::All { println!( - "{} phase {} repeat {} duration {:.2} elapsed {:.2} total {} mops {:.2}", - seq.load(Ordering::Relaxed), - phase, - repeat, - duration, - elapsed, - total, - throughput, + "phase {} repeat {} duration {:.2} elapsed {:.2} total {} mops {:.2}", + phase, repeat, duration, elapsed, total, throughput, ); - seq.fetch_add(1, Ordering::Relaxed); } } @@ -666,7 +653,6 @@ fn bench_stat_final( start: Instant, end: Instant, thread_info: (usize, usize), - seq: &Arc, measurements: &Vec>, ) { assert!(thread_info.0 == 0); @@ -687,13 +673,8 @@ fn bench_stat_final( if benchmark.report == ReportMode::Finish || benchmark.report == ReportMode::All { print!( - "{} phase {} finish . duration {:.2} elapsed {:.2} total {} mops {:.2}", - seq.load(Ordering::Relaxed), - phase, - duration, - elapsed, - total, - throughput, + "phase {} finish . duration {:.2} elapsed {:.2} total {} mops {:.2}", + phase, duration, elapsed, total, throughput, ); if benchmark.latency { print!(" "); @@ -726,7 +707,6 @@ fn bench_stat_final( } println!(); - seq.fetch_add(1, Ordering::Relaxed); } } @@ -740,7 +720,6 @@ fn bench_worker_regular( since, phase, measurements, - seq, barrier, thread_info, } = context; @@ -815,7 +794,6 @@ fn bench_worker_regular( start, end, thread_info, - &seq, &measurements, ); } @@ -835,7 +813,6 @@ fn bench_worker_regular( start, end, thread_info, - &seq, &measurements, ); } @@ -851,7 +828,6 @@ fn bench_worker_async( since, phase, measurements, - seq, barrier, thread_info, } = context; @@ -948,7 +924,6 @@ fn bench_worker_async( start, end, thread_info, - &seq, &measurements, ); } @@ -984,7 +959,6 @@ fn bench_worker_async( start, end, thread_info, - &seq, &measurements, ); } @@ -995,7 +969,6 @@ fn bench_phase_regular( benchmark: Arc, phase: usize, since: Arc, - seq: Arc, thread: &impl Thread, ) { let barrier = Arc::new(Barrier::new(benchmark.threads.try_into().unwrap())); @@ -1013,7 +986,6 @@ fn bench_phase_regular( phase, measurements: measurements.clone(), barrier, - seq: seq.clone(), since: *since, thread_info, }; @@ -1037,7 +1009,6 @@ fn bench_phase_async( benchmark: Arc, phase: usize, since: Arc, - seq: Arc, thread: &impl Thread, ) { let barrier = Arc::new(Barrier::new((benchmark.threads).try_into().unwrap())); @@ -1055,7 +1026,6 @@ fn bench_phase_async( phase, measurements: measurements.clone(), barrier, - seq: seq.clone(), since: *since, thread_info, }; @@ -1083,16 +1053,8 @@ pub fn bench_regular( ) { debug!("Running regular bencher"); let start = Arc::new(Instant::now()); - let seq = Arc::new(AtomicUsize::new(0)); for (i, p) in phases.iter().enumerate() { - bench_phase_regular( - map.clone(), - p.clone(), - i, - start.clone(), - seq.clone(), - &thread, - ); + bench_phase_regular(map.clone(), p.clone(), i, start.clone(), &thread); } } @@ -1106,16 +1068,8 @@ pub fn bench_async( ) { debug!("Running async bencher"); let start = Arc::new(Instant::now()); - let seq = Arc::new(AtomicUsize::new(0)); for (i, p) in phases.iter().enumerate() { - bench_phase_async( - map.clone(), - p.clone(), - i, - start.clone(), - seq.clone(), - &thread, - ); + bench_phase_async(map.clone(), p.clone(), i, start.clone(), &thread); } }