From ff9d2384e9020040a8dc562656bd12c26abf1ab8 Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:13:51 -0500 Subject: [PATCH 1/8] Working on adding additional operations to the `frozen-data-generation` to move the indexed data to frozen indices. --- README.md | 16 +++++++++ eventdata/challenges/frozen.json | 29 ++++++++++++++-- .../elasticlogs-hot-frozen-ilm-policy.json | 34 +++++++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 eventdata/elasticlogs-hot-frozen-ilm-policy.json diff --git a/README.md b/README.md index 902f45a..539a5e3 100644 --- a/README.md +++ b/README.md @@ -237,6 +237,22 @@ This challenge has been used for the evaluation of query performance across diff | `indices_recovery_max_bytes_per_sec` | If set, overrides Elasticsearch's default for [indices.recovery.max_bytes_per_sec](https://www.elastic.co/guide/en/elasticsearch/reference/current/recovery.html#recovery-settings) | `dict` | - | | `query_max_concurrent_shard_requests` | If set, overrides Elasticsearch's default for [max_concurrent_shard_requests](https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-multi-search.html#search-multi-search-api-query-params) for the Kibana queries | `int` | - | +### frozen-data-generation + +This challenge indexes data into an index that uses an ILM policy. After indexing is complete the index is manually rolled over to force it to be a frozen index. + +| Parameter | Explanation | Type | Default Value | +|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|--------|---------------| +| `ilm_policy` | The name of the ILM policy to use. | `str` | `elasticlogs-hot-frozen-ilm-policy` | +| `es_snapshot_repo_name` | Used if `ilm_policy` is is not set. The name of the snapshot repository from which the snapshot should be stored | `str` | - | +| `frozen_min_age` | Used if `ilm_policy` is is not set. The minimum age after rollover an index is moved to frozen | `str` | `5s` | +| `rollover_max_shard_size` | Used if `ilm_policy` is is not set. Max primary shard size condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `30gb` | +| `rollover_max_age` | Used if `ilm_policy` is is not set. Max age condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `1d` | + +### frozen-querying + +This challenge queries frozen indices. It depends on data loaded by the `frozen-data-generation` challenge. + ## Custom parameter sources ### elasticlogs\_bulk\_source diff --git a/eventdata/challenges/frozen.json b/eventdata/challenges/frozen.json index ef28ef9..ecd2f4d 100644 --- a/eventdata/challenges/frozen.json +++ b/eventdata/challenges/frozen.json @@ -2,7 +2,8 @@ {% set p_pre_filter_shard_size = (pre_filter_shard_size | default(1)) %} {% set p_bulk_idx_iterations = (bulk_indexing_iterations | default(30000)) %} {% set p_query_iterations = (query_iterations | default(3)) %} - +{% set p_ilm_policy = ( ilm_policy | default("elasticlogs-hot-frozen-ilm-policy"))} +{% set p_index_prefix = index_prefix | default("elasticlogs") %} { "name": "frozen-data-generation", "description": "Index approximately 50GB of data into an index with with 2 primary shards. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.", @@ -13,11 +14,33 @@ { "operation": "delete-index-template" }, + {% if p_ilm_policy == "elasticlogs-hot-frozen-ilm-policy" %} + { + "operation": "delete-ilm-policy" + }, + { + "operation": "create-ilm-policy" + }, + {%endif%} { "operation": { "operation-type": "create-index-template", "settings": { - "index.store.type": "hybridfs" + "index.store.type": "hybridfs", + "index.lifecycle.name": "{{p_ilm_policy}}", + "index.lifecycle.rollover_alias": "elasticlogs" + } + } + }, + { + "operation": { + "name": "create-elasticlogs-write-alias", + "operation-type": "create-index", + "index": "{{p_index_prefix}}-000001", + "body": { + "aliases" : { + "elasticlogs" : { "is_write_index": true } + } } } }, @@ -47,7 +70,7 @@ }, { "name": "frozen-querying", - "description": "Frozen node querying.", + "description": "Frozen node querying. Depends on frozen-data-generation.", "schedule": [ { "operation": { diff --git a/eventdata/elasticlogs-hot-frozen-ilm-policy.json b/eventdata/elasticlogs-hot-frozen-ilm-policy.json new file mode 100644 index 0000000..a020d90 --- /dev/null +++ b/eventdata/elasticlogs-hot-frozen-ilm-policy.json @@ -0,0 +1,34 @@ +{% set p_repo_name = %es_snapshot_repo_name } +{% set p_rollover_max_age = rollover_max_age | default("1d") %} +{% set p_rollover_max_shard_size = rollover_max_shard_size | default("30gb") %} +{% set p_frozen_min_age = frozen_min_age | default("5s") %} +{ + "policy": { + "phases": { + "hot": { + "min_age": "0ms", + "actions": { + "rollover": { + "max_primary_shard_size": {{ p_rollover_max_shard_size | tojson }}, + "max_age": {{ p_rollover_max_age | tojson }} + }, + "set_priority": { + "priority": 100 + }, + "forcemerge": { + "max_num_segments": 1 + } + } + }, + "frozen": { + "min_age": {{ p_frozen_min_age | tojson }}, + "actions": { + "searchable_snapshot": { + "snapshot_repository": {{ p_repo_name | tojson }}, + "force_merge_index": true + } + } + } + } + } +} From 6358261fec8735e6e562ae2ea0a154cfebd857db Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:26:05 -0500 Subject: [PATCH 2/8] Fixed typo in parameter definition for the challenge. --- eventdata/challenges/frozen.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eventdata/challenges/frozen.json b/eventdata/challenges/frozen.json index ecd2f4d..7b9a7e8 100644 --- a/eventdata/challenges/frozen.json +++ b/eventdata/challenges/frozen.json @@ -2,7 +2,7 @@ {% set p_pre_filter_shard_size = (pre_filter_shard_size | default(1)) %} {% set p_bulk_idx_iterations = (bulk_indexing_iterations | default(30000)) %} {% set p_query_iterations = (query_iterations | default(3)) %} -{% set p_ilm_policy = ( ilm_policy | default("elasticlogs-hot-frozen-ilm-policy"))} +{% set p_ilm_policy = ( ilm_policy | default("elasticlogs-hot-frozen-ilm-policy")) %} {% set p_index_prefix = index_prefix | default("elasticlogs") %} { "name": "frozen-data-generation", From 22c517eec4a0145ed4c71949d5f94bb0fc98b886 Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:54:22 -0500 Subject: [PATCH 3/8] Fixed the delete and create ILM policy operations. --- eventdata/challenges/frozen.json | 40 ++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/eventdata/challenges/frozen.json b/eventdata/challenges/frozen.json index 7b9a7e8..9ae11aa 100644 --- a/eventdata/challenges/frozen.json +++ b/eventdata/challenges/frozen.json @@ -16,10 +16,46 @@ }, {% if p_ilm_policy == "elasticlogs-hot-frozen-ilm-policy" %} { - "operation": "delete-ilm-policy" + "operation": { + "operation-type": "delete-ilm-policy", + "policy-name": "{{p_ilm_policy}}" + } }, { - "operation": "create-ilm-policy" + "operation": { + "operation-type": "create-ilm-policy", + "policy-name": "{{p_ilm_policy}}", + "body": { + "policy": { + "phases": { + "hot": { + "min_age": "0ms", + "actions": { + "rollover": { + "max_primary_shard_size": {{ rollover_max_shard_size | default("30gb") | tojson }}, + "max_age": {{ rollover_max_age | default("1d") | tojson }} + }, + "set_priority": { + "priority": 100 + }, + "forcemerge": { + "max_num_segments": 1 + } + } + }, + "frozen": { + "min_age": {{ frozen_min_age | default("5s") | tojson }}, + "actions": { + "searchable_snapshot": { + "snapshot_repository": {{ es_snapshot_repo_name | default("-") | tojson }}, + "force_merge_index": true + } + } + } + } + } + } + } }, {%endif%} { From 3e08d1bacff4f1b8227fd18c6ed61fb7e273ca5e Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:38:27 -0500 Subject: [PATCH 4/8] Added a note to the readme about license requirements for the `frozen-*` challenges. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 539a5e3..0310ddb 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,8 @@ This challenge has been used for the evaluation of query performance across diff This challenge indexes data into an index that uses an ILM policy. After indexing is complete the index is manually rolled over to force it to be a frozen index. +This challenge requires a license to run as the default ILM policy uses the `searchable_snapshot` action. It also requires the snapshot repository to be setup ahead of time in the cluster. + | Parameter | Explanation | Type | Default Value | |---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|--------|---------------| | `ilm_policy` | The name of the ILM policy to use. | `str` | `elasticlogs-hot-frozen-ilm-policy` | From a4646f72d9a754f32cfd606b10f7729e91757529 Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:12:21 -1000 Subject: [PATCH 5/8] Create ILM policy does not take a file so removing the extra file. --- .../elasticlogs-hot-frozen-ilm-policy.json | 34 ------------------- 1 file changed, 34 deletions(-) delete mode 100644 eventdata/elasticlogs-hot-frozen-ilm-policy.json diff --git a/eventdata/elasticlogs-hot-frozen-ilm-policy.json b/eventdata/elasticlogs-hot-frozen-ilm-policy.json deleted file mode 100644 index a020d90..0000000 --- a/eventdata/elasticlogs-hot-frozen-ilm-policy.json +++ /dev/null @@ -1,34 +0,0 @@ -{% set p_repo_name = %es_snapshot_repo_name } -{% set p_rollover_max_age = rollover_max_age | default("1d") %} -{% set p_rollover_max_shard_size = rollover_max_shard_size | default("30gb") %} -{% set p_frozen_min_age = frozen_min_age | default("5s") %} -{ - "policy": { - "phases": { - "hot": { - "min_age": "0ms", - "actions": { - "rollover": { - "max_primary_shard_size": {{ p_rollover_max_shard_size | tojson }}, - "max_age": {{ p_rollover_max_age | tojson }} - }, - "set_priority": { - "priority": 100 - }, - "forcemerge": { - "max_num_segments": 1 - } - } - }, - "frozen": { - "min_age": {{ p_frozen_min_age | tojson }}, - "actions": { - "searchable_snapshot": { - "snapshot_repository": {{ p_repo_name | tojson }}, - "force_merge_index": true - } - } - } - } - } -} From 9a373925913c45b42e454f853f3235fc66bc702e Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:07:13 -0500 Subject: [PATCH 6/8] Added missing challenge parameters for Frozen to the README.md file. --- README.md | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0310ddb..d05345b 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ The table below shows the track parameters that can be adjusted along with defau This challenge assumes that the *elasticlogs-1bn-load* track has been executed as it simulates querying against these indices. It shows how indexing and querying through simulated Kibana dashboards can be combined to provide a more realistic benchmark. -In this challenge rate-limited indexing at varying levels is combined with a fixed level of querying. If metrics from the run are stored in Elasticsearch, it is possible to analyse these in Kibana in order to identify how indexing rate affects query latency and vice versa. +In this challenge rate-limited indexing at varying levels is combined with a fixed level of querying. If metrics from the run are stored in Elasticsearch, it is possible to analyze these in Kibana in order to identify how indexing rate affects query latency and vice versa. The table below shows the track parameters that can be adjusted along with default values: @@ -173,7 +173,7 @@ Indexes several days of logs with a fixed (raw) logging volume per day and runni | Parameter | Explanation | Type | Default Value | | ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ----- | --------------------- | | `bulk_indexing_clients` | Number of bulk indexing clients/connections | `int` | `8` | -| `search_clients` | Number of search clients/connections used by *each** query | `int` | `1` | +| `search_clients` | Number of search clients/connections used by **each** query | `int` | `1` | | `bulk_size` | Number of documents to send per bulk | `int` | `1000` | | `daily_logging_volume` | The raw logging volume. Supported units are bytes (without any unit), `KB`, `MB` and `GB`). For the value, only integers are allowed. | `str` | `100GB` | | `starting_point` | The first timestamp for which logs should be generated. | `str` | `2018-05-25 00:00:00` | @@ -188,8 +188,8 @@ Indexes (several days of) logs at a fixed target throughput using a fixed (raw) | `bulk_indexing_reqs_per_sec` | Number of bulk indexing requests/sec. Multiply this by bulk_size to understand indexing throughput in docs/s. | `int` | `20` | | `bulk_size` | Number of documents to send per bulk | `int` | `1000` | | `bulk_indexing_clients` | Number of bulk indexing clients/connections | `int` | `8` | -| `search_clients` | Number of search clients/connections used by *each** query | `int` | `1` | -| `daily_logging_volume` | The raw logging volume. Supported units are bytes (without any unit), `KB`, `MB` and `GB`). For the value, only integers are allowed. | `str` | `100GB` | +| `search_clients` | Number of search clients/connections used by **each** query | `int` | `1` | +| `daily_logging_volume` | The raw logging volume. Supported units are bytes (without any unit), `KB`, `MB` and `GB`. For the value, only integers are allowed. | `str` | `100GB` | | `starting_point` | The first timestamp for which logs should be generated. | `str` | `2018-05-25 00:00:00` | | `number_of_days` | The number of simulated days for which data should be generated. | `int` | `6` | @@ -243,18 +243,26 @@ This challenge indexes data into an index that uses an ILM policy. After indexi This challenge requires a license to run as the default ILM policy uses the `searchable_snapshot` action. It also requires the snapshot repository to be setup ahead of time in the cluster. -| Parameter | Explanation | Type | Default Value | -|---------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|--------|---------------| -| `ilm_policy` | The name of the ILM policy to use. | `str` | `elasticlogs-hot-frozen-ilm-policy` | -| `es_snapshot_repo_name` | Used if `ilm_policy` is is not set. The name of the snapshot repository from which the snapshot should be stored | `str` | - | -| `frozen_min_age` | Used if `ilm_policy` is is not set. The minimum age after rollover an index is moved to frozen | `str` | `5s` | -| `rollover_max_shard_size` | Used if `ilm_policy` is is not set. Max primary shard size condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `30gb` | +| Parameter | Explanation | Type | Default Value | +|----------------------------|------------------------------------------------------------------------------------------------|--------|---------------| +| `bulk_indexing_iterations` | How many requests to send in total | `int` | `1000000` | +| `ilm_policy` | The name of the ILM policy to use. | `str` | `elasticlogs-hot-frozen-ilm-policy` | +| `es_snapshot_repo_name` | Used if `ilm_policy` is is not set. The name of the snapshot repository from which the snapshot should be stored | `str` | - | +| `frozen_min_age` | Used if `ilm_policy` is is not set. The minimum age after rollover an index is moved to frozen | `str` | `5s` | +| `rollover_max_shard_size` | Used if `ilm_policy` is is not set. Max primary shard size condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `30gb` | | `rollover_max_age` | Used if `ilm_policy` is is not set. Max age condition for [rollover API](https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-rollover-index.html#indices-rollover-index) | `str` | `1d` | ### frozen-querying This challenge queries frozen indices. It depends on data loaded by the `frozen-data-generation` challenge. +| Parameter | Explanation | Type | Default Value | +|---------------------------------|----------------------------------------------------------|--------|---------------| +| `max_concurrent_shard_requests` | Overrides Elasticsearch's default for [max_concurrent_shard_requests](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-search-api-query-params) | `int` | `10` | +| `pre_filter_shard_size` | Overrides Elasticsearch's default for [`pre_filter_shard_size`](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html#search-search-api-query-params), used with throttled (frozen) indices. | `int` | `1` | +| `query_iterations` | Number of query requests per client | `int` | `3` | +| `search_clients` | Number of search clients/connections used for each query | `int` | `1` | + ## Custom parameter sources ### elasticlogs\_bulk\_source From 329500d6ab5d407074a05e36573889760dc4453d Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Wed, 3 Jul 2024 13:20:45 -0500 Subject: [PATCH 7/8] Update the `frozen-querying` challenge to parameterize the number of clients. --- eventdata/challenges/frozen.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eventdata/challenges/frozen.json b/eventdata/challenges/frozen.json index 9ae11aa..b620af9 100644 --- a/eventdata/challenges/frozen.json +++ b/eventdata/challenges/frozen.json @@ -3,7 +3,7 @@ {% set p_bulk_idx_iterations = (bulk_indexing_iterations | default(30000)) %} {% set p_query_iterations = (query_iterations | default(3)) %} {% set p_ilm_policy = ( ilm_policy | default("elasticlogs-hot-frozen-ilm-policy")) %} -{% set p_index_prefix = index_prefix | default("elasticlogs") %} +{% set p_search_clients = (search_clients | default(1)) %} { "name": "frozen-data-generation", "description": "Index approximately 50GB of data into an index with with 2 primary shards. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.", @@ -152,7 +152,7 @@ "pre_filter_shard_size": {{ p_pre_filter_shard_size }} }, "iterations": {{ p_query_iterations }}, - "clients": 1 + "clients": {{ p_search_clients }} }, { "name": "kibana-content_issues-ip-25%", @@ -170,7 +170,7 @@ "pre_filter_shard_size": {{ p_pre_filter_shard_size }} }, "iterations": {{ p_query_iterations }}, - "clients": 1 + "clients": {{ p_search_clients }} } ] } From 8f4046331e79edaff0aa93f52dbf92ee4a341052 Mon Sep 17 00:00:00 2001 From: Nicole Albee <2642763+a03nikki@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:35:55 -0500 Subject: [PATCH 8/8] Increase client count on the fieldstats call to match the search client count, otherwise it fails out the frozen-query challenge. --- eventdata/challenges/frozen.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eventdata/challenges/frozen.json b/eventdata/challenges/frozen.json index b620af9..23a83a6 100644 --- a/eventdata/challenges/frozen.json +++ b/eventdata/challenges/frozen.json @@ -127,6 +127,7 @@ "iterations": 1, "clients": 1 }, + { "operation": { "operation-type": "fieldstats", @@ -134,7 +135,7 @@ "ignore_throttled": false }, "iterations": 1, - "clients": 1 + "clients": {{ p_search_clients }} }, { "name": "kibana-discover-ip-25%",