elastic · a03nikki · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
diff --git a/README.md b/README.md
@@ -80,7 +80,7 @@ The table below shows the track parameters that can be adjusted along with defau
 
 This challenge assumes that the *elasticlogs-1bn-load* track has been executed as it simulates querying against these indices. It shows how indexing and querying through simulated Kibana dashboards can be combined to provide a more realistic benchmark.
 
-In this challenge rate-limited indexing at varying levels is combined with a fixed level of querying. If metrics from the run are stored in Elasticsearch, it is possible to analyse these in Kibana in order to identify how indexing rate affects query latency and vice versa.
+In this challenge rate-limited indexing at varying levels is combined with a fixed level of querying. If metrics from the run are stored in Elasticsearch, it is possible to analyze these in Kibana in order to identify how indexing rate affects query latency and vice versa.
 
 The table below shows the track parameters that can be adjusted along with default values:
 
@@ -173,7 +173,7 @@ Indexes several days of logs with a fixed (raw) logging volume per day and runni
 | Parameter               | Explanation                                                                                                                            | Type  | Default Value         |
 | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ----- | --------------------- |
 | `bulk_indexing_clients` | Number of bulk indexing clients/connections                                                                                            | `int` | `8`                   |
-| `search_clients`        | Number of search clients/connections used by *each** query                                                                             | `int` | `1`                   |
+| `search_clients`        | Number of search clients/connections used by **each** query                                                                             | `int` | `1`                   |
 | `bulk_size`             | Number of documents to send per bulk                                                                                                   | `int` | `1000`                |
 | `daily_logging_volume`  | The raw logging volume. Supported units are bytes (without any unit), `KB`, `MB` and `GB`). For the value, only integers are allowed.  | `str` | `100GB`               |
 | `starting_point`        | The first timestamp for which logs should be generated.                                                                                | `str` | `2018-05-25 00:00:00` |
@@ -188,8 +188,8 @@ Indexes (several days of) logs at a fixed target throughput using a fixed (raw)
 | `bulk_indexing_reqs_per_sec` | Number of bulk indexing requests/sec. Multiply this by bulk_size to understand indexing throughput in docs/s.                          | `int` | `20`                  |
 | `bulk_size`                  | Number of documents to send per bulk                                                                                                   | `int` | `1000`                |
 | `bulk_indexing_clients`      | Number of bulk indexing clients/connections                                                                                            | `int` | `8`                   |
-| `search_clients`             | Number of search clients/connections used by *each** query                                                                             | `int` | `1`                   |
-| `daily_logging_volume`       | The raw logging volume. Supported units are bytes (without any unit), `KB`, `MB` and `GB`). For the value, only integers are allowed.  | `str` | `100GB`               |
+| `search_clients`             | Number of search clients/connections used by **each** query                                                                             | `int` | `1`                   |
+| `daily_logging_volume`       | The raw logging volume. Supported units are bytes (without any unit), `KB`, `MB` and `GB`. For the value, only integers are allowed.   | `str` | `100GB`               |
 | `starting_point`             | The first timestamp for which logs should be generated.                                                                                | `str` | `2018-05-25 00:00:00` |
 | `number_of_days`             | The number of simulated days for which data should be generated.                                                                       | `int` | `6`                   |
 
@@ -237,6 +237,32 @@ This challenge has been used for the evaluation of query performance across diff
 | `indices_recovery_max_bytes_per_sec`  | If set, overrides Elasticsearch's default for [indices.recovery.max_bytes_per_sec](https://www.elastic.co/guide/en/elasticsearch/reference/current/recovery.html#recovery-settings) | `dict` | - |
 | `query_max_concurrent_shard_requests` | If set, overrides Elasticsearch's default for [max_concurrent_shard_requests](https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-multi-search.html#search-multi-search-api-query-params) for the Kibana queries | `int`  | - |
 
+### frozen-data-generation
+
+This challenge indexes data into an index that uses an ILM policy.  After indexing is complete the index is manually rolled over to force it to be a frozen index.
+
+This challenge requires a license to run as the default ILM policy uses the `searchable_snapshot` action. It also requires the snapshot repository to be setup ahead of time in the cluster.
+
+| Parameter                  | Explanation                                                                                    | Type   | Default Value |
+|----------------------------|------------------------------------------------------------------------------------------------|--------|---------------|
+| `bulk_indexing_iterations` | How many requests to send in total                                                             | `int` | `1000000`                  |
+| `ilm_policy`               | The name of the ILM policy to use.                                                             | `str` | `elasticlogs-hot-frozen-ilm-policy` |
+| `es_snapshot_repo_name`    | Used if `ilm_policy` is is not set. The name of the snapshot repository from which the snapshot should be stored | `str`  | - |
+| `frozen_min_age`           | Used if `ilm_policy` is is not set. The minimum age after rollover an index is moved to frozen | `str` | `5s` |
+| `rollover_max_shard_size`  | Used if `ilm_policy` is is not set. Max primary shard size condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `30gb` |
+| `rollover_max_age` | Used if `ilm_policy` is is not set. Max age condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `1d` |
+
+### frozen-querying
+
+This challenge queries frozen indices.  It depends on data loaded by the `frozen-data-generation` challenge.
+
+| Parameter                       | Explanation                                              | Type   | Default Value |
+|---------------------------------|----------------------------------------------------------|--------|---------------|
+| `max_concurrent_shard_requests` | Overrides Elasticsearch's default for [max_concurrent_shard_requests](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-search-api-query-params) | `int` | `10` |
+| `pre_filter_shard_size`         | Overrides Elasticsearch's default for [`pre_filter_shard_size`](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-search-api-query-params), used with throttled (frozen) indices. | `int` | `1` |
+| `query_iterations`              | Number of query requests per client                      | `int` | `3` |
+| `search_clients`                | Number of search clients/connections used for each query | `int` | `1` |
+
 ## Custom parameter sources
 
 ### elasticlogs\_bulk\_source

diff --git a/eventdata/challenges/frozen.json b/eventdata/challenges/frozen.json
@@ -2,7 +2,8 @@
 {% set p_pre_filter_shard_size = (pre_filter_shard_size | default(1)) %}
 {% set p_bulk_idx_iterations = (bulk_indexing_iterations | default(30000)) %}
 {% set p_query_iterations = (query_iterations | default(3)) %}
-
+{% set p_ilm_policy = ( ilm_policy | default("elasticlogs-hot-frozen-ilm-policy")) %}
+{% set p_search_clients = (search_clients | default(1)) %}
 {
   "name": "frozen-data-generation",
   "description": "Index approximately 50GB of data into an index with with 2 primary shards. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.",
@@ -13,11 +14,69 @@
     {
       "operation": "delete-index-template"
     },
+  {% if p_ilm_policy == "elasticlogs-hot-frozen-ilm-policy" %}
+    {
+      "operation": {
+        "operation-type": "delete-ilm-policy",
+        "policy-name": "{{p_ilm_policy}}"
+      }
+    },
+    {
+      "operation": {
+        "operation-type": "create-ilm-policy",
+        "policy-name": "{{p_ilm_policy}}",
+        "body": {
+          "policy": {
+            "phases": {
+              "hot": {
+                "min_age": "0ms",
+                "actions": {
+                  "rollover": {
+                    "max_primary_shard_size": {{ rollover_max_shard_size | default("30gb") | tojson }},
+                    "max_age": {{ rollover_max_age | default("1d") | tojson }}
+                  },
+                  "set_priority": {
+                    "priority": 100
+                  },
+                  "forcemerge": {
+                    "max_num_segments": 1
+                  }
+                }
+              },
+              "frozen": {
+                "min_age": {{ frozen_min_age | default("5s") | tojson }},
+                "actions": {
+                  "searchable_snapshot": {
+                    "snapshot_repository": {{ es_snapshot_repo_name | default("-") | tojson }},
+                    "force_merge_index": true
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+  {%endif%}
     {
       "operation": {
         "operation-type": "create-index-template",
         "settings": {
-          "index.store.type": "hybridfs"
+          "index.store.type": "hybridfs",
+          "index.lifecycle.name": "{{p_ilm_policy}}",
+          "index.lifecycle.rollover_alias": "elasticlogs"
+        }
+      }
+    },
+    {
+      "operation": {
+        "name": "create-elasticlogs-write-alias",
+        "operation-type": "create-index",
+        "index": "{{p_index_prefix}}-000001",
+        "body": {
+          "aliases" : {
+            "elasticlogs" : { "is_write_index": true }
+          }
         }
       }
     },
@@ -47,7 +106,7 @@
 },
 {
   "name": "frozen-querying",
-  "description": "Frozen node querying.",
+  "description": "Frozen node querying. Depends on frozen-data-generation.",
   "schedule": [
     {
       "operation": {
@@ -68,14 +127,15 @@
       "iterations": 1,
       "clients": 1
     },
+
     {
       "operation": {
         "operation-type": "fieldstats",
         "index_pattern": "elasticlogs*",
         "ignore_throttled": false
       },
       "iterations": 1,
-      "clients": 1
+      "clients": {{ p_search_clients }}
     },
     {
       "name": "kibana-discover-ip-25%",
@@ -93,7 +153,7 @@
         "pre_filter_shard_size": {{ p_pre_filter_shard_size }}
       },
       "iterations": {{ p_query_iterations }},
-      "clients": 1
+      "clients": {{ p_search_clients }}
     },
     {
       "name": "kibana-content_issues-ip-25%",
@@ -111,7 +171,7 @@
         "pre_filter_shard_size": {{ p_pre_filter_shard_size }}
       },
       "iterations": {{ p_query_iterations }},
-      "clients": 1
+      "clients": {{ p_search_clients }}
     }
   ]
 }