*: add github workflow for testing

nerdroychan · Jul 23, 2024 · e52fc15 · e52fc15
1 parent 2d5fff6
commit e52fc15
Show file tree

Hide file tree

Showing 8 changed files with 257 additions and 84 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,29 @@
+name: test
+
+on:
+  push:
+    branches: [ "master" ]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/cache@v4
+      with:
+        path: |
+          ~/.cargo/bin/
+          ~/.cargo/registry/index/
+          ~/.cargo/registry/cache/
+          ~/.cargo/git/db/
+          target/
+        key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.toml') }}
+    - name: build
+      run: cargo build --verbose --release
+    - name: tests
+      run: cargo test --release
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,31 +5,31 @@ edition = "2021"
 exclude = ["presets", "examples"]
 
 [dependencies]
-serde = { version = "*", features = ["derive"] }
-bincode = "*"
-chashmap = "*"
-contrie = "*"
-mio = { version = "*", features = ["net", "os-poll"] }
-dashmap = { version = "*", features = ["inline"] }
-parking_lot = "*"
-clap = { version = "=4.0.0", features = ["derive"] }
-rand = "*"
-log = "*"
-zipf = "*"
-serial_test = "*"
-env_logger = "*"
-jemallocator = "*"
-toml = "*"
-inventory = "*"
-core_affinity = "*"
-quanta = "*"
-hashbrown = "*"
-ahash = "*"
-figment = { version = "*", features = ["toml", "env"] }
-scc = "*"
-flurry = "*"
-papaya = "*"
-ctrlc = "*"
+ahash = "0.8.11"
+bincode = "1.3.3"
+chashmap = "2.2.2"
+clap = { version = "=4.5.10", features = ["derive"] }
+contrie = "0.1.4"
+core_affinity = "0.8.1"
+ctrlc = "3.4.4"
+dashmap = { version = "6.0.1", features = ["inline"] }
+env_logger = "0.11.3"
+figment = { version = "0.10.19", features = ["toml", "env"] }
+flurry = "0.5.1"
+hashbrown = "0.14.5"
+inventory = "0.3.15"
+jemallocator = "0.5.4"
+log = "0.4.22"
+mio = { version = "1.0.0", features = ["net", "os-poll"] }
+papaya = "0.1.1"
+parking_lot = "0.12.3"
+quanta = "0.12.3"
+rand = "0.8.5"
+scc = "2.1.2"
+serde = { version = "1.0.204", features = ["derive"] }
+serial_test = "3.1.1"
+toml = "0.8.14"
+zipf = "7.0.1"
 
 [profile.release]
 debug = true

diff --git a/README.md b/README.md
@@ -1,16 +1,52 @@
 # kvbench
 
+![GitHub Workflow](https://github.com/nerdroychan/kvbench/actions/workflows/test.yml/badge.svg)
+![GPLv3](https://img.shields.io/github/license/nerdroychan/kvbench)
+
 A benchmarking framework designed for testing key-value stores with easily customizable
 workloads.
 
-With `kvbench`, you can define the details of a benchmark using the TOML format, such as the
-proportions of mixed operations, the key access pattern, and key space size, just to name a
-few. In addition to regular single-process benchmarks, `kvbench` also integrates a key-value
-client/server implementation that works with a dedicated server thread/machine.
+## Intro
+
+This Rust crate enables the execution of tailored benchmarks on various key-value stores. Users
+have the flexibility to adjust benchmark and key-value store parameters and store them in a
+TOML-formatted file. The default command line interface is capable of loading these files and
+running the benchmarks as specified.
+
+In addition to standard single-process benchmarks, kvbench seamlessly incorporates a key-value
+client/server setup that operates with a dedicated server thread/machine.
+
+## Usage
+
+Without any changes, you can run the default command line interface with three modes:
+
+- `kvbench bench -s <STORE_FILE> -b <BENCH_FILE>` runs a (group of) benchmark using the parameters
+stored in `<STORE_FILE>` and `<BENCH_FILE>`.
+- `kvbench server -h <HOST> -p <PORT> -s <STORE_FILE> -n <THREADS>` boots up a key-value server
+with `<THREADS>` workers, listening on `<HOST>:<PORT>`, and uses the key-value stores specified in
+`<STORE_FILE>`.
+- `kvbench list` lists all registered key-value stores that can be used.
+
+See [examples](examples/) for more examples.
+
+## Configuration
+
+See the documentation of the modules `stores` and `bench` for available options.
+
+## Integration
+
+You can incorporate `kvbench` into your own key-value store implementations and run it
+against the built-in stores. All you need is implementing the necessary traits, depending on the
+type of the store, and call the default command line interface provided by this crate.
+
+See [examples/your-kv-store](examples/your-kv-store) for a minimal but concrete example.
+
+## Development
 
-You can also incorporate `kvbench` into your own key-value store implementations and run it
-against the built-in stores. All you need is implementing the [`KVMap`] or the [`AsyncKVMap`]
-trait, depending on the type of the store. After registering your store, simply reuse the
-exported [`cmdline()`] in your `main` function and it will work seamlessly with your own store.
+The missing pieces that are currently under active development:
 
-More detailed usage could be found in the module-level rustdocs.
+- Latency measurement (incl. CDF and tail/avg. latency metrics).
+- Atomic Read-modify-write (RMW) support.
+- More key-distributions (e.g., latest key, composite-zipfian).
+- Ordered key-value stores support (range query workloads).
+- Extra built-ins (e.g., YCSB workloads).
diff --git a/src/bench.rs b/src/bench.rs
@@ -1,4 +1,56 @@
 //! The core benchmark functionalities.
+//!
+//! A benchmark in this crate actually refers to a group of benchmark runs, named **phases**. Users
+//! can provide one or multiple phases that will be run sequentially, each with different
+//! configurations.
+//!
+//! ## Configuration Format
+//!
+//! A benchmark configuration file is formatted in TOML. It consists of the definition of multiple
+//! phases, each is defined in a dictionary named `benchmark`. Phases are organized in an array, so
+//! the configuration of each phase starts with `[[benchmark]]`. It also supports a `[global]`
+//! section in the configuration file that will override the missing field in each phase. This can
+//! reduce the number of repeated options in each phase (e.g., shared options).
+//!
+//! A configuration file generally looks like the following:
+//!
+//! ```toml
+//! [global]
+//! # global options
+//!
+//! [[benchmark]]
+//! # phase 1 configuration
+//!
+//! [[benchmark]]
+//! # phase 2 configuration
+//!
+//! ...
+//! ```
+//!
+//! Available options and their usage can be found in [`BenchmarkOpt`] and [`GlobalOpt`], for phase
+//! and global options, respectively.
+//!
+//! ## Output Format
+//!
+//! When measuring throughput, an output may look like the following:
+//! ```
+//! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00
+//! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00
+//! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00
+//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00
+//! ```
+//!
+//! From the first element to the last element in each line, the meanings are:
+//! - Report sequence number.
+//! - "phase" followed by the phase id.
+//! - "repeat" followed by the repeat id in a phase, or "finish .", if it is the aggregated report
+//! of a whole phase.
+//! - "duration" followed by the duration of the repeat/phase, in seconds.
+//! - "elapsed" followed by the total elapsed time when this line is printed, since the starting of
+//! all the benchmarks.
+//! - "total" followed by the total key-value operations executed by all worker threads in the
+//! repeat/phase.
+//! - "mops" followed by the thorughput in million operations per second of the repeat/phase.
 
 use crate::thread::{JoinHandle, Thread};
 use crate::workload::{Workload, WorkloadOpt};
@@ -39,8 +91,10 @@ impl BenchKVMap {
 }
 
 /// The centralized registry that maps the name of newly added key-value store to its constructor
-/// function. A user-defined store can use the [`inventory::submit!`] macro to register their own
-/// stores to be used in the benchmark framework.
+/// function.
+///
+/// A user-defined store can use the [`inventory::submit!`] macro to register their own stores to
+/// be used in the benchmark framework.
 pub struct Registry<'a> {
     pub(crate) name: &'a str,
     constructor: fn(&Table) -> BenchKVMap,
@@ -106,27 +160,53 @@ enum ReportMode {
     All,
 }
 
-/// The configuration of a single benchmark deserialized from a toml string. The fields are
-/// optional to ease parsing from toml, as there can be global parameters that are set for them.
+/// The configuration of a single benchmark deserialized from a toml string.
+///
+/// The fields are optional to ease parsing from toml, as there can be global parameters that are
+/// set for them.
 #[derive(Deserialize, Clone, Debug)]
-struct BenchmarkOpt {
-    /// Number of threads that runs this benchmark.
-    threads: Option<usize>,
-    /// How many times this benchmark will be executed.
-    repeat: Option<usize>,
-    /// How long this benchmark will run, unit is seconds.
-    timeout: Option<f32>,
-    /// Fallback bound when timeout is not given.
-    ops: Option<u64>,
-    /// Report mode: "hidden", "repeat", "finish", "all"
-    report: Option<String>,
-    /// Max depth of queue for each worker (async only)
-    qd: Option<usize>,
-    /// Batch size for each request (async only)
-    batch: Option<usize>,
-    /// The definition of a workload. (flattened)
+pub struct BenchmarkOpt {
+    /// Number of threads that runs this benchmark. Default 1.
+    pub threads: Option<usize>,
+
+    /// How many times this benchmark will be repeated. Default 1.
+    pub repeat: Option<usize>,
+
+    /// How long this benchmark will run, unit is seconds. If this option is specified, the `ops`
+    /// option will be ignored.
+    ///
+    /// Note: see `ops`.
+    pub timeout: Option<f32>,
+
+    /// How many operations each worker will execute. Only used if `timeout` is not given.
+    ///
+    /// Note: if both `timeout` and `ops` are not given, the run is only stopped when all possible
+    /// keys are generated.
+    pub ops: Option<u64>,
+
+    /// Report mode:
+    ///
+    /// - "hidden": not reported.
+    /// - "repeat": after each repeat, the metrics for that repeat is printed.
+    /// - "finish": after all repeats are finished, the metrics of the whole phase is printed.
+    /// - "all": equals to "repeat" + "finish".
+    pub report: Option<String>,
+
+    /// Max depth of queue for each worker (async only).
+    ///
+    /// When the pending requests are less than `qd`, the worker will not attempt to get more
+    /// responses.
+    pub qd: Option<usize>,
+
+    /// Batch size for each request (async only).
+    pub batch: Option<usize>,
+
+    /// The definition of a workload.
+    ///
+    /// This section is embedded and flattened, so that you can directly use options in
+    /// [`WorkloadOpt`].
     #[serde(flatten)]
-    workload: WorkloadOpt,
+    pub workload: WorkloadOpt,
 }
 
 impl BenchmarkOpt {
@@ -214,22 +294,21 @@ impl Benchmark {
 
 // {{{ benchmarkgroup
 
-/// The global options that go to the [global] section in a BenchmarkGroup.
-/// They will override missing fields.
+/// The global options that go to the `[global]` section.
+///
+/// They will override missing fields in each `[[benchmark]]` section, if the corresponding option
+/// is missing. For the usage of each option, please refer to [`BenchmarkOpt`].
 #[derive(Deserialize, Clone, Debug)]
-struct GlobalOpt {
-    /// For benchmark
-    threads: Option<usize>,
-    repeat: Option<usize>,
-    qd: Option<usize>,
-    batch: Option<usize>,
-    report: Option<String>,
-
-    /// For workloads
-    klen: Option<usize>,
-    vlen: Option<usize>,
-    kmin: Option<usize>,
-    kmax: Option<usize>,
+pub struct GlobalOpt {
+    pub threads: Option<usize>,
+    pub repeat: Option<usize>,
+    pub qd: Option<usize>,
+    pub batch: Option<usize>,
+    pub report: Option<String>,
+    pub klen: Option<usize>,
+    pub vlen: Option<usize>,
+    pub kmin: Option<usize>,
+    pub kmax: Option<usize>,
 }
 
 impl Default for GlobalOpt {
@@ -801,6 +880,8 @@ fn bench_phase_async(
 }
 
 /// The real benchmark function for [`KVMap`].
+///
+/// **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
 pub fn bench_regular(
     map: Arc<Box<impl KVMap + ?Sized>>,
     phases: &Vec<Arc<Benchmark>>,
@@ -822,6 +903,8 @@ pub fn bench_regular(
 }
 
 /// The real benchmark function for [`AsyncKVMap`].
+///
+/// **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
 pub fn bench_async(
     map: Arc<Box<impl AsyncKVMap + ?Sized>>,
     phases: &Vec<Arc<Benchmark>>,