From ad52951a175957c4f6a47f1ee3089d9b86f3f4c9 Mon Sep 17 00:00:00 2001
From: Chen Chen <mail@roychan.org>
Date: Tue, 23 Jul 2024 21:12:40 -0500
Subject: [PATCH] *: major refactor; add github workflow for testing

---
 .github/workflows/test.yml |  29 +++++
 Cargo.toml                 |  50 ++++-----
 README.md                  |  54 +++++++--
 src/bench.rs               | 225 ++++++++++++++++++++-----------------
 src/cmdline.rs             |   4 +-
 src/lib.rs                 |  36 ++++--
 src/server.rs              |   9 +-
 src/stores/chashmap.rs     |   2 +-
 src/stores/contrie.rs      |   2 +-
 src/stores/dashmap.rs      |   2 +-
 src/stores/flurry.rs       |   2 +-
 src/stores/hashmap.rs      |   2 -
 src/stores/mod.rs          | 100 ++++++++++++++++-
 src/stores/null.rs         |   2 +-
 src/stores/papaya.rs       |   2 +-
 src/stores/remote.rs       |   2 +-
 src/stores/scc.rs          |   2 +-
 src/thread.rs              |   2 +
 src/workload.rs            |  11 +-
 19 files changed, 371 insertions(+), 167 deletions(-)
 create mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..7243d43
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,29 @@
+name: test
+
+on:
+  push:
+    branches: [ "master" ]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/cache@v4
+      with:
+        path: |
+          ~/.cargo/bin/
+          ~/.cargo/registry/index/
+          ~/.cargo/registry/cache/
+          ~/.cargo/git/db/
+          target/
+        key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.toml') }}
+    - name: build
+      run: cargo build --verbose --release
+    - name: tests
+      run: cargo test --release
diff --git a/Cargo.toml b/Cargo.toml
index 8a7b77e..187c871 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,31 +5,31 @@ edition = "2021"
 exclude = ["presets", "examples"]
 
 [dependencies]
-serde = { version = "*", features = ["derive"] }
-bincode = "*"
-chashmap = "*"
-contrie = "*"
-mio = { version = "*", features = ["net", "os-poll"] }
-dashmap = { version = "*", features = ["inline"] }
-parking_lot = "*"
-clap = { version = "=4.0.0", features = ["derive"] }
-rand = "*"
-log = "*"
-zipf = "*"
-serial_test = "*"
-env_logger = "*"
-jemallocator = "*"
-toml = "*"
-inventory = "*"
-core_affinity = "*"
-quanta = "*"
-hashbrown = "*"
-ahash = "*"
-figment = { version = "*", features = ["toml", "env"] }
-scc = "*"
-flurry = "*"
-papaya = "*"
-ctrlc = "*"
+ahash = "0.8.11"
+bincode = "1.3.3"
+chashmap = "2.2.2"
+clap = { version = "=4.5.10", features = ["derive"] }
+contrie = "0.1.4"
+core_affinity = "0.8.1"
+ctrlc = "3.4.4"
+dashmap = { version = "6.0.1", features = ["inline"] }
+env_logger = "0.11.3"
+figment = { version = "0.10.19", features = ["toml", "env"] }
+flurry = "0.5.1"
+hashbrown = "0.14.5"
+inventory = "0.3.15"
+jemallocator = "0.5.4"
+log = "0.4.22"
+mio = { version = "1.0.0", features = ["net", "os-poll"] }
+papaya = "0.1.1"
+parking_lot = "0.12.3"
+quanta = "0.12.3"
+rand = "0.8.5"
+scc = "2.1.2"
+serde = { version = "1.0.204", features = ["derive"] }
+serial_test = "3.1.1"
+toml = "0.8.14"
+zipf = "7.0.1"
 
 [profile.release]
 debug = true
diff --git a/README.md b/README.md
index 0d97c22..f88b7a3 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,52 @@
 # kvbench
 
+![GitHub Workflow](https://github.com/nerdroychan/kvbench/actions/workflows/test.yml/badge.svg)
+![GPLv3](https://img.shields.io/github/license/nerdroychan/kvbench)
+
 A benchmarking framework designed for testing key-value stores with easily customizable
 workloads.
 
-With `kvbench`, you can define the details of a benchmark using the TOML format, such as the
-proportions of mixed operations, the key access pattern, and key space size, just to name a
-few. In addition to regular single-process benchmarks, `kvbench` also integrates a key-value
-client/server implementation that works with a dedicated server thread/machine.
+## Intro
+
+This Rust crate enables the execution of tailored benchmarks on various key-value stores. Users
+have the flexibility to adjust benchmark and key-value store parameters and store them in a
+TOML-formatted file. The default command line interface is capable of loading these files and
+running the benchmarks as specified.
+
+In addition to standard single-process benchmarks, kvbench seamlessly incorporates a key-value
+client/server setup that operates with a dedicated server thread/machine.
+
+## Usage
+
+Without any changes, you can run the default command line interface with three modes:
+
+- `kvbench bench -s <STORE_FILE> -b <BENCH_FILE>` runs a (group of) benchmark using the parameters
+stored in `<STORE_FILE>` and `<BENCH_FILE>`.
+- `kvbench server -h <HOST> -p <PORT> -s <STORE_FILE> -n <THREADS>` boots up a key-value server
+with `<THREADS>` workers, listening on `<HOST>:<PORT>`, and uses the key-value stores specified in
+`<STORE_FILE>`.
+- `kvbench list` lists all registered key-value stores that can be used.
+
+See [examples](examples/) for more examples.
+
+## Configuration
+
+See the documentation of the modules `stores` and `bench` for available options.
+
+## Integration
+
+You can incorporate `kvbench` into your own key-value store implementations and run it
+against the built-in stores. All you need is implementing the necessary traits, depending on the
+type of the store, and call the default command line interface provided by this crate.
+
+See [examples/your-kv-store](examples/your-kv-store) for a minimal but concrete example.
+
+## Development
 
-You can also incorporate `kvbench` into your own key-value store implementations and run it
-against the built-in stores. All you need is implementing the [`KVMap`] or the [`AsyncKVMap`]
-trait, depending on the type of the store. After registering your store, simply reuse the
-exported [`cmdline()`] in your `main` function and it will work seamlessly with your own store.
+The missing pieces that are currently under active development:
 
-More detailed usage could be found in the module-level rustdocs.
+- Latency measurement (incl. CDF and tail/avg. latency metrics).
+- Atomic Read-modify-write (RMW) support.
+- More key-distributions (e.g., latest key, composite-zipfian).
+- Ordered key-value stores support (range query workloads).
+- Extra built-ins (e.g., YCSB workloads).
diff --git a/src/bench.rs b/src/bench.rs
index 95135b9..e9335ae 100644
--- a/src/bench.rs
+++ b/src/bench.rs
@@ -1,11 +1,68 @@
 //! The core benchmark functionalities.
-
+//!
+//! A benchmark in this crate actually refers to a group of benchmark runs, named **phases**. Users
+//! can provide one or multiple phases that will be run sequentially, each with different
+//! configurations.
+//!
+//! ## Configuration Format
+//!
+//! A benchmark configuration file is formatted in TOML. It consists of the definition of multiple
+//! phases, each is defined in a dictionary named `benchmark`. Phases are organized in an array, so
+//! the configuration of each phase starts with `[[benchmark]]`. It also supports a `[global]`
+//! section in the configuration file that will override the missing field in each phase. This can
+//! reduce the number of repeated options in each phase (e.g., shared options).
+//!
+//! A configuration file generally looks like the following:
+//!
+//! ```toml
+//! [global]
+//! # global options
+//!
+//! [[benchmark]]
+//! # phase 1 configuration
+//!
+//! [[benchmark]]
+//! # phase 2 configuration
+//!
+//! ...
+//! ```
+//!
+//! Available options and their usage can be found in [`BenchmarkOpt`] and [`GlobalOpt`], for phase
+//! and global options, respectively.
+//!
+//! Options in `[global]` section can be overwritten via environment variables without changing the
+//! content in the TOML file.
+//! For example, if the user needs to override `x` in `[global]`, setting the envrionment variable
+//! `global.x` will get the job done.
+//!
+//! ## Output Format
+//!
+//! When measuring throughput, an output may look like the following:
+//! ```txt
+//! 0 phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00
+//! 1 phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00
+//! 2 phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00
+//! 3 phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00
+//! ```
+//!
+//! From the first element to the last element in each line, the meanings are:
+//! - Report sequence number.
+//! - "phase" followed by the phase id.
+//! - "repeat" followed by the repeat id in a phase, or "finish .", if it is the aggregated report
+//! of a whole phase.
+//! - "duration" followed by the duration of the repeat/phase, in seconds.
+//! - "elapsed" followed by the total elapsed time when this line is printed, since the starting of
+//! all the benchmarks.
+//! - "total" followed by the total key-value operations executed by all worker threads in the
+//! repeat/phase.
+//! - "mops" followed by the thorughput in million operations per second of the repeat/phase.
+
+use crate::stores::{BenchKVMap, BenchKVMapOpt};
 use crate::thread::{JoinHandle, Thread};
 use crate::workload::{Workload, WorkloadOpt};
 use crate::*;
 use figment::providers::{Env, Format, Toml};
 use figment::Figment;
-use hashbrown::HashMap;
 use log::debug;
 use parking_lot::Mutex;
 use quanta::Instant;
@@ -14,71 +71,6 @@ use std::rc::Rc;
 use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
 use std::sync::{Arc, Barrier};
 use std::time::Duration;
-use toml::Table;
-
-// {{{ benchmap
-
-/// A unified enum for a created key-value store that is ready to run.
-pub enum BenchKVMap {
-    Regular(Box<dyn KVMap>),
-    Async(Box<dyn AsyncKVMap>),
-}
-
-impl BenchKVMap {
-    /// Wraps the real `bench` function of the store.
-    pub fn bench(self, phases: &Vec<Arc<Benchmark>>) {
-        match self {
-            BenchKVMap::Regular(map) => {
-                KVMap::bench(map, phases);
-            }
-            BenchKVMap::Async(map) => {
-                AsyncKVMap::bench(map, phases);
-            }
-        };
-    }
-}
-
-/// The centralized registry that maps the name of newly added key-value store to its constructor
-/// function. A user-defined store can use the [`inventory::submit!`] macro to register their own
-/// stores to be used in the benchmark framework.
-pub struct Registry<'a> {
-    pub(crate) name: &'a str,
-    constructor: fn(&Table) -> BenchKVMap,
-}
-
-impl<'a> Registry<'a> {
-    pub const fn new(name: &'a str, constructor: fn(&Table) -> BenchKVMap) -> Self {
-        Self { name, constructor }
-    }
-}
-
-inventory::collect!(Registry<'static>);
-
-/// An aggregated option enum that can be parsed from a toml string. It contains all necessary
-/// parameters for each type of maps to be created.
-#[derive(Deserialize, Clone, Debug)]
-pub(crate) struct BenchKVMapOpt {
-    name: String,
-    #[serde(flatten)]
-    opt: Table,
-}
-
-impl BenchKVMap {
-    pub(crate) fn new(opt: &BenchKVMapOpt) -> BenchKVMap {
-        // construct the hashmap.. this will be done every time
-        let mut registered: HashMap<&'static str, fn(&Table) -> BenchKVMap> = HashMap::new();
-        for r in inventory::iter::<Registry> {
-            debug!("Adding supported kvmap: {}", r.name);
-            assert!(registered.insert(r.name, r.constructor).is_none()); // no existing name
-        }
-        let f = registered.get(opt.name.as_str()).unwrap_or_else(|| {
-            panic!("map {} not found in registry", opt.name);
-        });
-        f(&opt.opt)
-    }
-}
-
-// }}} benchmap
 
 // {{{ benchmark
 
@@ -106,27 +98,53 @@ enum ReportMode {
     All,
 }
 
-/// The configuration of a single benchmark deserialized from a toml string. The fields are
-/// optional to ease parsing from toml, as there can be global parameters that are set for them.
+/// The configuration of a single benchmark deserialized from a toml string.
+///
+/// The fields are optional to ease parsing from toml, as there can be global parameters that are
+/// set for them.
 #[derive(Deserialize, Clone, Debug)]
-struct BenchmarkOpt {
-    /// Number of threads that runs this benchmark.
-    threads: Option<usize>,
-    /// How many times this benchmark will be executed.
-    repeat: Option<usize>,
-    /// How long this benchmark will run, unit is seconds.
-    timeout: Option<f32>,
-    /// Fallback bound when timeout is not given.
-    ops: Option<u64>,
-    /// Report mode: "hidden", "repeat", "finish", "all"
-    report: Option<String>,
-    /// Max depth of queue for each worker (async only)
-    qd: Option<usize>,
-    /// Batch size for each request (async only)
-    batch: Option<usize>,
-    /// The definition of a workload. (flattened)
+pub struct BenchmarkOpt {
+    /// Number of threads that runs this benchmark. Default 1.
+    pub threads: Option<usize>,
+
+    /// How many times this benchmark will be repeated. Default 1.
+    pub repeat: Option<usize>,
+
+    /// How long this benchmark will run, unit is seconds. If this option is specified, the `ops`
+    /// option will be ignored.
+    ///
+    /// Note: see `ops`.
+    pub timeout: Option<f32>,
+
+    /// How many operations each worker will execute. Only used if `timeout` is not given.
+    ///
+    /// Note: if both `timeout` and `ops` are not given, the run is only stopped when all possible
+    /// keys are generated.
+    pub ops: Option<u64>,
+
+    /// Report mode:
+    ///
+    /// - "hidden": not reported.
+    /// - "repeat": after each repeat, the metrics for that repeat is printed.
+    /// - "finish": after all repeats are finished, the metrics of the whole phase is printed.
+    /// - "all": equals to "repeat" + "finish".
+    pub report: Option<String>,
+
+    /// Max depth of queue for each worker (async only).
+    ///
+    /// When the pending requests are less than `qd`, the worker will not attempt to get more
+    /// responses.
+    pub qd: Option<usize>,
+
+    /// Batch size for each request (async only).
+    pub batch: Option<usize>,
+
+    /// The definition of a workload.
+    ///
+    /// This section is embedded and flattened, so that you can directly use options in
+    /// [`WorkloadOpt`].
     #[serde(flatten)]
-    workload: WorkloadOpt,
+    pub workload: WorkloadOpt,
 }
 
 impl BenchmarkOpt {
@@ -214,22 +232,21 @@ impl Benchmark {
 
 // {{{ benchmarkgroup
 
-/// The global options that go to the [global] section in a BenchmarkGroup.
-/// They will override missing fields.
+/// The global options that go to the `[global]` section.
+///
+/// They will override missing fields in each `[[benchmark]]` section, if the corresponding option
+/// is missing. For the usage of each option, please refer to [`BenchmarkOpt`].
 #[derive(Deserialize, Clone, Debug)]
-struct GlobalOpt {
-    /// For benchmark
-    threads: Option<usize>,
-    repeat: Option<usize>,
-    qd: Option<usize>,
-    batch: Option<usize>,
-    report: Option<String>,
-
-    /// For workloads
-    klen: Option<usize>,
-    vlen: Option<usize>,
-    kmin: Option<usize>,
-    kmax: Option<usize>,
+pub struct GlobalOpt {
+    pub threads: Option<usize>,
+    pub repeat: Option<usize>,
+    pub qd: Option<usize>,
+    pub batch: Option<usize>,
+    pub report: Option<String>,
+    pub klen: Option<usize>,
+    pub vlen: Option<usize>,
+    pub kmin: Option<usize>,
+    pub kmax: Option<usize>,
 }
 
 impl Default for GlobalOpt {
@@ -297,7 +314,7 @@ struct BenchmarkGroupOpt {
 
 // {{{ bencher
 
-pub(crate) fn init(text: &str) -> (BenchKVMap, Vec<Arc<Benchmark>>) {
+pub fn init(text: &str) -> (BenchKVMap, Vec<Arc<Benchmark>>) {
     let opt: BenchmarkGroupOpt = Figment::new()
         .merge(Toml::string(text))
         .merge(Env::raw())
@@ -801,6 +818,8 @@ fn bench_phase_async(
 }
 
 /// The real benchmark function for [`KVMap`].
+///
+/// **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
 pub fn bench_regular(
     map: Arc<Box<impl KVMap + ?Sized>>,
     phases: &Vec<Arc<Benchmark>>,
@@ -822,6 +841,8 @@ pub fn bench_regular(
 }
 
 /// The real benchmark function for [`AsyncKVMap`].
+///
+/// **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
 pub fn bench_async(
     map: Arc<Box<impl AsyncKVMap + ?Sized>>,
     phases: &Vec<Arc<Benchmark>>,
diff --git a/src/cmdline.rs b/src/cmdline.rs
index 5d843f5..fa26b82 100644
--- a/src/cmdline.rs
+++ b/src/cmdline.rs
@@ -1,4 +1,4 @@
-use crate::bench::*;
+use crate::stores::{BenchKVMap, Registry};
 use clap::ValueHint::FilePath;
 use clap::{Args, Parser, Subcommand};
 use log::debug;
@@ -53,7 +53,7 @@ fn bench_cli(args: &BenchArgs) {
         read_to_string(s.as_str()).unwrap() + "\n" + &read_to_string(b.as_str()).unwrap()
     };
 
-    let (map, phases) = init(&opt);
+    let (map, phases) = crate::bench::init(&opt);
     map.bench(&phases);
 }
 
diff --git a/src/lib.rs b/src/lib.rs
index fbe9e92..52d5b1d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -11,7 +11,20 @@
 //! trait, depending on the type of the store. After registering your store, simply reuse the
 //! exported [`cmdline()`] in your `main` function and it will work seamlessly with your own store.
 //!
-//! More detailed usage could be found in the module-level rustdocs.
+//! A few noteworthy design choices include:
+//!
+//! - Each key-value store exclusively stores a single type of key/value pair: variable-sized byte
+//! arrays represented as [`u8`] slices on the heap. No generics over the key's type.
+//! - The key-value store and the benchmark configurations are black boxes. They are created
+//! dynamically from a TOML file, and dynamically dispatched.
+//! - Benchmark functionalities can be reused in users' own crates: new key-value stores can be
+//! dynamically registered without touching the source code of this crate.
+//!
+//! More detailed usage could be found in the module-level rustdocs:
+//!
+//! - [`mod@bench`] for the config format of a benchmark.
+//! - [`mod@stores`] for the config format of a built-in key-value store.
+//! - [`cmdline()`] for the usage of the default command line interface.
 
 use serde::{Deserialize, Serialize};
 use std::cell::RefCell;
@@ -29,20 +42,23 @@ pub trait KVMap: Send + Sync + 'static {
     /// For most stores, this can just be done using an Arc.
     fn handle(&self) -> Box<dyn KVMapHandle>;
 
-    /// The main bench method, with its default implementation usually doesn't need manual
-    /// implementation unless the implementor needs custom thread spawn-join functions.
-    /// If one would like to manually implement this method, it is needed to explicitly declare a
-    /// new [`thread::Thread`] object and pass it to [`bench::bench_regular`].
+    /// The main bench method.
+    ///
+    /// Users usually don't need to manually implement this method unless the implementor needs
+    /// custom thread spawn-join functions. If one would like to do so, it is needed to explicitly
+    /// declare a new [`thread::Thread`] object and pass it to [`bench::bench_regular`].
     fn bench(self: Box<Self>, phases: &Vec<Arc<crate::bench::Benchmark>>) {
         let map = Arc::new(self);
         let thread = crate::thread::DefaultThread;
         crate::bench::bench_regular(map, phases, thread);
     }
 
-    /// Start the main loop of KV server while using this map as the backend. There is no need to
-    /// manually implement this method unless the implementor needs custom thread spawn-join
-    /// functions. If one would like to manually implement this method, it is needed to explicitly
-    /// declare a new [`thread::Thread`] object and pass it to [`server::server_regular`].
+    /// Start the main loop of KV server while using this map as the backend.
+    ///
+    /// There is no need to manually implement this method unless the implementor needs custom
+    /// thread spawn-join functions. If one would like to manually implement this method, it is
+    /// needed to explicitly declare a new [`thread::Thread`] object and pass it to
+    /// [`server::server_regular`].
     fn server(
         self: Box<Self>,
         host: &str,
@@ -108,7 +124,7 @@ pub struct Response {
     pub data: Option<Box<[u8]>>,
 }
 
-/// An non-blocking, thread-safe key-value map.
+/// A non-blocking, thread-safe key-value map.
 ///
 /// Unlike [`KVMap`], [`AsyncKVMap`] works in request/response style. Where each handle needs to be
 /// created by registering an explicit responder that serves as the "callback" when the underlying
diff --git a/src/server.rs b/src/server.rs
index cf0f8b5..8c6e195 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -1,7 +1,7 @@
 //! A key-value server/client implementation.
 
-use crate::bench::{BenchKVMap, BenchKVMapOpt};
 use crate::serialization::{read_request, read_response, write_request, write_response};
+use crate::stores::{BenchKVMap, BenchKVMapOpt};
 use crate::thread::{JoinHandle, Thread};
 use crate::*;
 use figment::providers::{Env, Format, Toml};
@@ -439,6 +439,8 @@ fn server_mainloop(
 }
 
 /// The real server function for [`KVMap`].
+///
+/// **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
 pub fn server_regular(
     map: Arc<Box<impl KVMap + ?Sized>>,
     host: &str,
@@ -478,6 +480,8 @@ pub fn server_regular(
 }
 
 /// The real server function for [`AsyncKVMap`].
+///
+/// **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
 pub fn server_async(
     map: Arc<Box<impl AsyncKVMap + ?Sized>>,
     host: &str,
@@ -636,7 +640,7 @@ struct ServerMapOpt {
     map: BenchKVMapOpt,
 }
 
-pub(crate) fn init(text: &str) -> BenchKVMap {
+pub fn init(text: &str) -> BenchKVMap {
     let opt: ServerMapOpt = Figment::new()
         .merge(Toml::string(&text))
         .merge(Env::raw())
@@ -649,7 +653,6 @@ pub(crate) fn init(text: &str) -> BenchKVMap {
 mod tests {
     use super::*;
 
-    use crate::bench::BenchKVMap;
     use crate::stores::*;
     use std::sync::atomic::{AtomicU32, Ordering};
     use std::sync::mpsc::{channel, Receiver, Sender};
diff --git a/src/stores/chashmap.rs b/src/stores/chashmap.rs
index 4f5c9a2..0287d9d 100644
--- a/src/stores/chashmap.rs
+++ b/src/stores/chashmap.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 #[derive(Clone)]
diff --git a/src/stores/contrie.rs b/src/stores/contrie.rs
index 4338576..8e5db82 100644
--- a/src/stores/contrie.rs
+++ b/src/stores/contrie.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 #[derive(Clone)]
diff --git a/src/stores/dashmap.rs b/src/stores/dashmap.rs
index e600395..8a87bf3 100644
--- a/src/stores/dashmap.rs
+++ b/src/stores/dashmap.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 #[derive(Clone)]
diff --git a/src/stores/flurry.rs b/src/stores/flurry.rs
index a670cca..9ef1fff 100644
--- a/src/stores/flurry.rs
+++ b/src/stores/flurry.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 #[derive(Clone)]
diff --git a/src/stores/hashmap.rs b/src/stores/hashmap.rs
index 5f9e941..79eb976 100644
--- a/src/stores/hashmap.rs
+++ b/src/stores/hashmap.rs
@@ -1,6 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
 use crate::stores::*;
-use crate::*;
 use ::hashbrown::HashMap;
 use parking_lot::{Mutex, RwLock};
 use serde::Deserialize;
diff --git a/src/stores/mod.rs b/src/stores/mod.rs
index 142cb17..f0733fa 100644
--- a/src/stores/mod.rs
+++ b/src/stores/mod.rs
@@ -1,7 +1,42 @@
 //! The implementation of built-in key-value stores, and some util functions.
-
+//!
+//! ## Configuration Format
+//!
+//! The configuration of a key-value store is stored in a dictionary named `map`. Therefore, a
+//! store's config file looks like the following:
+//!
+//! ```toml
+//! [map]
+//! name = "..."
+//! # option1 = ...
+//! # option2 = ...
+//!
+//! ...
+//! ```
+//! The field `name` must be given and it should be equal to the name registered by the store.
+//! Other than `name`, all the fileds are parsed as a string map and will be hand over to the
+//! constructor of the store's constructor function.
+//!
+//! ## Registering New Stores
+//!
+//! When users would like to dynamically register new key-value stores from their own crate, first
+//! of all, they need to implemement the corresponding [`KVMap`]/[`KVMapHandle`]
+//! (or [`AsyncKVMap`]/[`AsyncKVMapHandle`]) for the store. Then, they need to create a construcor
+//! function with a signature of `fn(&toml::Table) -> BenchKVMap`.
+//!
+//! The final step is to register the store's constructor (along with its name) using
+//! [`inventory`]. A minimal example would be: `inventory::submit! { Registry::new("name",
+//! constructor_fn) };`.
+//!
+//! The source code of all built-in stores provide good examples on this process.
+
+use crate::bench::Benchmark;
+use crate::*;
 use ahash::AHasher;
+use hashbrown::HashMap;
+use log::debug;
 use std::hash::Hasher;
+use toml::Table;
 
 pub fn hash(key: &[u8]) -> u64 {
     let mut hasher = AHasher::default();
@@ -16,6 +51,68 @@ pub fn find_shard(key: &[u8], nr_shards: usize) -> usize {
     usize::try_from(hash).unwrap() % nr_shards
 }
 
+/// A unified enum for a created key-value store that is ready to run.
+pub enum BenchKVMap {
+    Regular(Box<dyn KVMap>),
+    Async(Box<dyn AsyncKVMap>),
+}
+
+impl BenchKVMap {
+    /// Wraps the real `bench` function of the store.
+    pub fn bench(self, phases: &Vec<Arc<Benchmark>>) {
+        match self {
+            BenchKVMap::Regular(map) => {
+                KVMap::bench(map, phases);
+            }
+            BenchKVMap::Async(map) => {
+                AsyncKVMap::bench(map, phases);
+            }
+        };
+    }
+}
+
+/// The centralized registry that maps the name of newly added key-value store to its constructor
+/// function.
+///
+/// A user-defined store can use the [`inventory::submit!`] macro to register their own stores to
+/// be used in the benchmark framework.
+pub struct Registry<'a> {
+    pub(crate) name: &'a str,
+    constructor: fn(&Table) -> BenchKVMap,
+}
+
+impl<'a> Registry<'a> {
+    pub const fn new(name: &'a str, constructor: fn(&Table) -> BenchKVMap) -> Self {
+        Self { name, constructor }
+    }
+}
+
+inventory::collect!(Registry<'static>);
+
+/// An aggregated option enum that can be parsed from a toml string. It contains all necessary
+/// parameters for each type of maps to be created.
+#[derive(Deserialize, Clone, Debug)]
+pub(crate) struct BenchKVMapOpt {
+    name: String,
+    #[serde(flatten)]
+    opt: Table,
+}
+
+impl BenchKVMap {
+    pub(crate) fn new(opt: &BenchKVMapOpt) -> BenchKVMap {
+        // construct the hashmap.. this will be done every time
+        let mut registered: HashMap<&'static str, fn(&Table) -> BenchKVMap> = HashMap::new();
+        for r in inventory::iter::<Registry> {
+            debug!("Adding supported kvmap: {}", r.name);
+            assert!(registered.insert(r.name, r.constructor).is_none()); // no existing name
+        }
+        let f = registered.get(opt.name.as_str()).unwrap_or_else(|| {
+            panic!("map {} not found in registry", opt.name);
+        });
+        f(&opt.opt)
+    }
+}
+
 pub mod chashmap;
 pub mod contrie;
 pub mod dashmap;
@@ -39,7 +136,6 @@ pub use scc::*;
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::*;
 
     fn map_test(map: &impl KVMap) {
         let mut handle = map.handle();
diff --git a/src/stores/null.rs b/src/stores/null.rs
index 179957c..5d84c5c 100644
--- a/src/stores/null.rs
+++ b/src/stores/null.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 /// NullMap does nothing. It can be used to measure overheads in the future.
diff --git a/src/stores/papaya.rs b/src/stores/papaya.rs
index 74ed2c5..96b1a16 100644
--- a/src/stores/papaya.rs
+++ b/src/stores/papaya.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 #[derive(Clone)]
diff --git a/src/stores/remote.rs b/src/stores/remote.rs
index 3f23175..1ed950e 100644
--- a/src/stores/remote.rs
+++ b/src/stores/remote.rs
@@ -1,5 +1,5 @@
-use crate::bench::{BenchKVMap, Registry};
 use crate::server::KVClient;
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 use serde::Deserialize;
 use std::rc::Rc;
diff --git a/src/stores/scc.rs b/src/stores/scc.rs
index d6aa70a..426fec6 100644
--- a/src/stores/scc.rs
+++ b/src/stores/scc.rs
@@ -1,4 +1,4 @@
-use crate::bench::{BenchKVMap, Registry};
+use crate::stores::{BenchKVMap, Registry};
 use crate::*;
 
 #[derive(Clone)]
diff --git a/src/thread.rs b/src/thread.rs
index 855a5f5..89cb5cf 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -1,5 +1,7 @@
 //! Spawn-join functionality.
 //!
+//! **You may not need to check this if it is ok to run benchmarks with [`std::thread`].**
+//!
 //! A KVMap implementation is generally passive. However, some KVMap may act like a server with
 //! active threads. In that case, one may employ its own implementation of spawn-join. If that is
 //! the case, their join handle (like std::thread::JoinHandle) should implement the JoinHandle
diff --git a/src/workload.rs b/src/workload.rs
index ca09824..18b1144 100644
--- a/src/workload.rs
+++ b/src/workload.rs
@@ -126,10 +126,6 @@ pub struct WorkloadOpt {
     /// Percentage of `DELETE` operations.
     pub del_perc: u8,
 
-    // Section of key/value generation
-    // (klen, vlen, kmin, kmax) are marked optional because one may not specify them in each
-    // individual workload, but instead in benchmark settings, and the bench module will take care
-    // of it. So they must not be None when creating a workload.
     /// Key length in bytes.
     pub klen: Option<usize>,
 
@@ -143,12 +139,19 @@ pub struct WorkloadOpt {
     pub kmax: Option<usize>,
 
     /// Key distribution.
+    ///
+    /// - "increment": sequentially incrementing from `kmin` to `kmax`.
+    /// - "incrementp": partitioned `increment`, where each thread takes a range of keys.
+    /// - "uniform": uniformly random keys from `kmin` to `kmax`.
+    /// - "zipfian": random keys from `kmin` to `kmax` following Zipfian distribution.
     pub dist: String,
 
     /// The theta parameter for Zipfian distribution. (Optional, default 1.0)
     pub zipf_theta: Option<f64>,
 
     /// The hotspot location for Zipfian distribution. (Optional, default 0.0)
+    ///
+    /// 0.0 means the first key. 0.5 means approximately the middle in the key space.
     pub zipf_hotspot: Option<f64>,
 }