From 99e1806ac347af926b22a1a8c3b6ca6e9d8a6c73 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 17:26:46 -0400 Subject: [PATCH 1/8] add stix-shifter diagnosis script --- bin/stix-shifter-diag | 28 ++++ setup.cfg | 4 +- src/kestrel/utils.py | 10 ++ .../diagnosis.py | 146 ++++++++++++++++++ 4 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 bin/stix-shifter-diag create mode 100644 src/kestrel_datasource_stixshifter/diagnosis.py diff --git a/bin/stix-shifter-diag b/bin/stix-shifter-diag new file mode 100644 index 00000000..d69a0506 --- /dev/null +++ b/bin/stix-shifter-diag @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +import argparse +from kestrel_datasource_stixshifter.diagnosis import Diagnosis + +def gen_patterns(): + time_range = "START t'2000-01-01T00:00:00.000Z' STOP t'3000-01-01T00:00:00.000Z'" + patterns = [ + "[ipv4-addr:value LIKE '%']", + "[process:pid > 0]", + "[email-addr:value LIKE '%']", + ] + return [" ".join([p, time_range]) for p in patterns] + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Kestrel stix-shifter data source interface diagnosis") + parser.add_argument("datasource", help="data source name specified in stixshifter.yaml") + args = parser.parse_args() + + patterns = gen_patterns() + + diag = Diagnosis(args.datasource) + + diag.diagnose_config() + diag.diagnose_ping() + diag.diagnose_translate_query(patterns[0]) + diag.diagnose_run_query_and_retrieval_result(patterns, 1) + diag.diagnose_run_query_and_retrieval_result(patterns, 5) diff --git a/setup.cfg b/setup.cfg index a6927a0d..bdca71cb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,9 @@ project_urls = packages = find: package_dir = =src -scripts = bin/kestrel +scripts = + bin/kestrel + bin/stix-shifter-diag python_requires = >= 3.8 install_requires = typeguard>=4.0.0 diff --git a/src/kestrel/utils.py b/src/kestrel/utils.py index 0a3d0ea0..dd9c5593 100644 --- a/src/kestrel/utils.py +++ b/src/kestrel/utils.py @@ -16,6 +16,16 @@ def lowered_str_list(xs): return [x.lower() for x in xs if isinstance(x, str)] +def mask_value_in_nested_dict(d): + if d: + for k, v in d.items(): + if isinstance(v, collections.abc.Mapping): + d[k] = mask_value_in_nested_dict(v) + elif isinstance(v, str): + d[k] = "********" + return d + + def update_nested_dict(dict_old, dict_new): if dict_new: for k, v in dict_new.items(): diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py new file mode 100644 index 00000000..f932efbc --- /dev/null +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -0,0 +1,146 @@ +import json +from multiprocessing import Queue +from kestrel.utils import mask_value_in_nested_dict +from kestrel_datasource_stixshifter.config import ( + get_datasource_from_profiles, + load_options, + load_profiles, + set_stixshifter_logging_level, +) +from kestrel_datasource_stixshifter.worker import STOP_SIGN +from kestrel_datasource_stixshifter.query import ( + gen_observation_metadata, + translate_query, +) +from kestrel_datasource_stixshifter.worker.transmitter import Transmitter +from stix_shifter.stix_transmission import stix_transmission + + +class Diagnosis: + def __init__(self, datasource_name): + self.datasource_name = datasource_name + self.profiles = load_profiles() + self.kestrel_options = load_options() + ( + self.connector_name, + self.connection_dict, + self.configuration_dict, + self.retrieval_batch_size, + self.cool_down_after_transmission, + ) = get_datasource_from_profiles(datasource_name, self.profiles) + self.if_fast_translation = ( + self.connector_name in self.kestrel_options["fast_translate"] + ) + + def diagnose_config(self): + print() + print() + print() + print("## Diagnose: config verification") + + configuration_dict_masked = mask_value_in_nested_dict(self.configuration_dict) + + print() + print("#### Kestrel specific config") + print(f"retrieval batch size: {self.retrieval_batch_size}") + print(f"cool down after transmission: {self.cool_down_after_transmission}") + print(f"enable fast translation: {self.if_fast_translation}") + + print() + print("#### Config to be passed to stix-shifter") + print(f"connector name: {self.connector_name}") + print( + "connection object [ref: https://github.com/opencybersecurityalliance/stix-shifter/blob/develop/OVERVIEW.md#connection]:" + ) + print(json.dumps(self.connection_dict, indent=4)) + print( + "configuration object [ref: https://github.com/opencybersecurityalliance/stix-shifter/blob/develop/OVERVIEW.md#configuration]:" + ) + print(json.dumps(configuration_dict_masked, indent=4)) + + def diagnose_ping(self): + print() + print() + print() + print("## Diagnose: stix-shifter to data source connection (network, auth)") + + transmission = stix_transmission.StixTransmission( + self.connector_name, + self.connection_dict, + self.configuration_dict, + ) + + result = transmission.ping() + + print() + print("#### Results from stixshifter transmission.ping()") + print(json.dumps(result, indent=4)) + + def diagnose_translate_query(self, stix_pattern, quiet=False): + if not quiet: + print() + print() + print() + print("## Diagnose: stix-shifter query translation") + + dsl = translate_query( + self.connector_name, + {}, + stix_pattern, + self.connection_dict, + ) + + if not quiet: + print() + print("#### Input pattern") + print(stix_pattern) + print() + print("#### Output data source native query") + print(json.dumps(dsl, indent=4)) + + return dsl + + def diagnose_run_query_and_retrieval_result(self, stix_patterns, max_batch_cnt): + print() + print() + print() + print(f"## Diagnose: stix-shifter query execution: <={max_batch_cnt} batch(s)") + + result_queue = Queue() + result_counts = [] + + for pattern in stix_patterns: + for query in self.diagnose_translate_query(pattern, True)["queries"]: + transmitter = Transmitter( + self.connector_name, + self.connection_dict, + self.configuration_dict, + self.retrieval_batch_size, + self.cool_down_after_transmission, + query, + result_queue, + max_batch_cnt * self.retrieval_batch_size, + ) + + transmitter.run() + result_queue.put(STOP_SIGN) + + print() + print("#### data retrieval results:") + for packet in iter(result_queue.get, STOP_SIGN): + if packet.success: + cnt = len(packet.data[0]["objects"]) + result_counts.append(cnt) + print(f"one batch retrieved: {cnt} observations") + else: + print(packet.log) + + if result_counts: + break + else: + print(f"no result matched for query: {query}, go next query") + + if result_counts: + break + else: + print(f"no result matched for pattern: {pattern}, go next pattern") From 87d7c89d08d317524e42ac93545b7f973b9cc145 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 18:03:33 -0400 Subject: [PATCH 2/8] styling fixes --- src/kestrel_datasource_stixshifter/diagnosis.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py index f932efbc..56ad0059 100644 --- a/src/kestrel_datasource_stixshifter/diagnosis.py +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -2,16 +2,13 @@ from multiprocessing import Queue from kestrel.utils import mask_value_in_nested_dict from kestrel_datasource_stixshifter.config import ( + set_stixshifter_logging_level, get_datasource_from_profiles, load_options, load_profiles, - set_stixshifter_logging_level, ) from kestrel_datasource_stixshifter.worker import STOP_SIGN -from kestrel_datasource_stixshifter.query import ( - gen_observation_metadata, - translate_query, -) +from kestrel_datasource_stixshifter.query import translate_query from kestrel_datasource_stixshifter.worker.transmitter import Transmitter from stix_shifter.stix_transmission import stix_transmission @@ -31,6 +28,7 @@ def __init__(self, datasource_name): self.if_fast_translation = ( self.connector_name in self.kestrel_options["fast_translate"] ) + set_stixshifter_logging_level() def diagnose_config(self): print() From 1787e0a6c71db62236534aa00b7ee0a6000a2208 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 18:07:31 -0400 Subject: [PATCH 3/8] add doc for stix-shifter-diag --- src/kestrel_datasource_stixshifter/interface.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/kestrel_datasource_stixshifter/interface.py b/src/kestrel_datasource_stixshifter/interface.py index c9ad6d0f..f893c0b7 100644 --- a/src/kestrel_datasource_stixshifter/interface.py +++ b/src/kestrel_datasource_stixshifter/interface.py @@ -92,6 +92,12 @@ #. any in-session edit through the ``CONFIG`` command. +Once you added data source profiles into ``stixshifter.yaml``, you can test the data source with command: + +.. code-block:: console + + $ stix-shifter-diag data_source_name + If you launch Kestrel in debug mode, STIX-shifter debug mode is still not enabled by default. To record debug level logs of STIX-shifter, create environment variable ``KESTREL_STIXSHIFTER_DEBUG`` with any value. From 17093baef625f9cbc68ccf07823a5c8bf5861306 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 18:11:20 -0400 Subject: [PATCH 4/8] update error msg for ss exception --- src/kestrel/exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kestrel/exceptions.py b/src/kestrel/exceptions.py index 3f8bbff5..12e9f2dd 100644 --- a/src/kestrel/exceptions.py +++ b/src/kestrel/exceptions.py @@ -198,7 +198,7 @@ def __init__(self, uri, itf, msg=""): class DataSourceError(KestrelException): def __init__(self, error, suggestion=""): if not suggestion: - suggestion = "please check data source config or test the query manually" + suggestion = "please check data source config or diagnose with stix-shifter-diag command" super().__init__( error, suggestion, From 0d653f9833125bf684c81eac4898d0f7a604af5e Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 18:50:21 -0400 Subject: [PATCH 5/8] add unit tests for ss diagnosis --- .../diagnosis.py | 2 + tests/test_cli.py | 34 ++----- tests/test_stixshifter_diagnosis.py | 89 +++++++++++++++++++ tests/utils.py | 21 +++++ 4 files changed, 120 insertions(+), 26 deletions(-) create mode 100644 tests/test_stixshifter_diagnosis.py diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py index 56ad0059..e19d11e9 100644 --- a/src/kestrel_datasource_stixshifter/diagnosis.py +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -142,3 +142,5 @@ def diagnose_run_query_and_retrieval_result(self, stix_patterns, max_batch_cnt): break else: print(f"no result matched for pattern: {pattern}, go next pattern") + + return result_counts diff --git a/tests/test_cli.py b/tests/test_cli.py index 5c31333e..e831423e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,21 +1,10 @@ import pytest -import os import subprocess -@pytest.fixture() -def setup_huntflow(tmp_path): - profiles = """ -profiles: - lab101: - connector: stix_bundle - connection: - host: https://github.com/opencybersecurityalliance/data-bucket-kestrel/blob/main/stix-bundles/lab101.json?raw=true - config: - auth: - username: - password: -""" +from .utils import stixshifter_profile_lab101 +@pytest.fixture() +def create_huntflow(tmp_path): huntflow = """ procs = GET process FROM stixshifter://lab101 WHERE name = 'svchost.exe' @@ -24,27 +13,20 @@ def setup_huntflow(tmp_path): expected_result_lines = ["VARIABLE TYPE #(ENTITIES) #(RECORDS) directory* file* ipv4-addr* ipv6-addr* mac-addr* network-traffic* process* user-account* x-ecs-destination* x-ecs-network* x-ecs-process* x-ecs-source* x-ecs-user* x-oca-asset* x-oca-event*", " procs process 389 1066 1078 1114 3190 1910 1066 1014 725 1062 2016 2016 2120 2024 2124 1066 2132"] - profile_file = tmp_path / "stixshifter.yaml" huntflow_file = tmp_path / "hunt101.hf" - os.environ["KESTREL_STIXSHIFTER_CONFIG"] = str(profile_file.expanduser().resolve()) - with open(profile_file, "w") as pf: - pf.write(profiles) - with open(huntflow_file, "w") as hf: hf.write(huntflow) huntflow_file_path = str(huntflow_file.expanduser().resolve()) - # https://docs.pytest.org/en/latest/how-to/fixtures.html#teardown-cleanup-aka-fixture-finalization - yield huntflow_file_path, expected_result_lines - del os.environ["KESTREL_STIXSHIFTER_CONFIG"] + return huntflow_file_path, expected_result_lines -def test_cli(setup_huntflow): +def test_cli(create_huntflow, stixshifter_profile_lab101): - huntflow_file_path, expected_result_lines = setup_huntflow + huntflow_file_path, expected_result_lines = create_huntflow result = subprocess.run(args = ["kestrel", huntflow_file_path], universal_newlines = True, stdout = subprocess.PIPE @@ -55,9 +37,9 @@ def test_cli(setup_huntflow): assert result_lines[-2] == expected_result_lines[1] -def test_python_module_call(setup_huntflow): +def test_python_module_call(create_huntflow, stixshifter_profile_lab101): - huntflow_file_path, expected_result_lines = setup_huntflow + huntflow_file_path, expected_result_lines = create_huntflow result = subprocess.run(args = ["python", "-m", "kestrel", huntflow_file_path], universal_newlines = True, stdout = subprocess.PIPE diff --git a/tests/test_stixshifter_diagnosis.py b/tests/test_stixshifter_diagnosis.py new file mode 100644 index 00000000..f5efe47e --- /dev/null +++ b/tests/test_stixshifter_diagnosis.py @@ -0,0 +1,89 @@ +import pytest +import subprocess + +from kestrel_datasource_stixshifter.diagnosis import Diagnosis +from .utils import stixshifter_profile_lab101 + + +def test_diagnosis(stixshifter_profile_lab101): + pattern = " ".join([ + "[ipv4-addr:value LIKE '%']", + "START t'2000-01-01T00:00:00.000Z' STOP t'3000-01-01T00:00:00.000Z'", + ]) + diag = Diagnosis("lab101") + diag.diagnose_config() + diag.diagnose_ping() + assert pattern == diag.diagnose_translate_query(pattern)["queries"][0] + res = diag.diagnose_run_query_and_retrieval_result([pattern], 1) + assert len(res) == 1 and res[0] == 15 + + +def test_cli(stixshifter_profile_lab101): + + expected_output = """ +## Diagnose: config verification + +#### Kestrel specific config +retrieval batch size: 2000 +cool down after transmission: 0 +enable fast translation: False + +#### Config to be passed to stix-shifter +connector name: stix_bundle +connection object [ref: https://github.com/opencybersecurityalliance/stix-shifter/blob/develop/OVERVIEW.md#connection]: +{ + "host": "https://github.com/opencybersecurityalliance/data-bucket-kestrel/blob/main/stix-bundles/lab101.json?raw=true", + "options": { + "result_limit": 4000, + "timeout": 60 + } +} +configuration object [ref: https://github.com/opencybersecurityalliance/stix-shifter/blob/develop/OVERVIEW.md#configuration]: +{ + "auth": { + "username": null, + "password": null + } +} + +## Diagnose: stix-shifter to data source connection (network, auth) + +#### Results from stixshifter transmission.ping() +{ + "success": true +} + +## Diagnose: stix-shifter query translation + +#### Input pattern +[ipv4-addr:value LIKE '%'] START t'2000-01-01T00:00:00.000Z' STOP t'3000-01-01T00:00:00.000Z' + +#### Output data source native query +{ + "queries": [ + "[ipv4-addr:value LIKE '%'] START t'2000-01-01T00:00:00.000Z' STOP t'3000-01-01T00:00:00.000Z'" + ] +} + +## Diagnose: stix-shifter query execution: <=1 batch(s) + +#### data retrieval results: +one batch retrieved: 15 observations + +## Diagnose: stix-shifter query execution: <=5 batch(s) + +#### data retrieval results: +one batch retrieved: 15 observations +""" + + result = subprocess.run(args = ["stix-shifter-diag", "lab101"], + universal_newlines = True, + stdout = subprocess.PIPE + ) + + result_lines = result.stdout.splitlines() + result_lines = [x for x in result_lines if x] + expected_lines = expected_output.splitlines() + expected_lines = [x for x in expected_lines if x] + for x,y in zip(result_lines, expected_lines): + assert x == y diff --git a/tests/utils.py b/tests/utils.py index 2de6643c..2600cac1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,7 @@ import os import pytest +# https://docs.pytest.org/en/latest/how-to/fixtures.html#teardown-cleanup-aka-fixture-finalization @pytest.fixture def set_empty_kestrel_config(tmp_path): @@ -27,3 +28,23 @@ def set_no_prefetch_kestrel_config(tmp_path): ) yield None del os.environ["KESTREL_CONFIG"] + + +@pytest.fixture +def stixshifter_profile_lab101(tmp_path): + profile_file = tmp_path / "stixshifter.yaml" + os.environ["KESTREL_STIXSHIFTER_CONFIG"] = str(profile_file.expanduser().resolve()) + with open(profile_file, "w") as pf: + pf.write(""" +profiles: + lab101: + connector: stix_bundle + connection: + host: https://github.com/opencybersecurityalliance/data-bucket-kestrel/blob/main/stix-bundles/lab101.json?raw=true + config: + auth: + username: + password: +""") + yield None + del os.environ["KESTREL_STIXSHIFTER_CONFIG"] From 6ace32a90d1a92f997942b3294f6e019c1b53127 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 18:57:38 -0400 Subject: [PATCH 6/8] fix bug on masked pwd in ss diag --- src/kestrel_datasource_stixshifter/diagnosis.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py index e19d11e9..0301f809 100644 --- a/src/kestrel_datasource_stixshifter/diagnosis.py +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -1,4 +1,5 @@ import json +from copy import deepcopy from multiprocessing import Queue from kestrel.utils import mask_value_in_nested_dict from kestrel_datasource_stixshifter.config import ( @@ -36,7 +37,9 @@ def diagnose_config(self): print() print("## Diagnose: config verification") - configuration_dict_masked = mask_value_in_nested_dict(self.configuration_dict) + configuration_dict_masked = mask_value_in_nested_dict( + deepcopy(self.configuration_dict) + ) print() print("#### Kestrel specific config") From 8d4b60a91c0387c74fa2a8520771ab5d0033b857 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 19:05:17 -0400 Subject: [PATCH 7/8] correct counting in ss diag --- src/kestrel_datasource_stixshifter/diagnosis.py | 4 ++-- tests/test_stixshifter_diagnosis.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py index 0301f809..9deee2c7 100644 --- a/src/kestrel_datasource_stixshifter/diagnosis.py +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -130,9 +130,9 @@ def diagnose_run_query_and_retrieval_result(self, stix_patterns, max_batch_cnt): print("#### data retrieval results:") for packet in iter(result_queue.get, STOP_SIGN): if packet.success: - cnt = len(packet.data[0]["objects"]) + cnt = len(packet.data) result_counts.append(cnt) - print(f"one batch retrieved: {cnt} observations") + print(f"one batch retrieved: {cnt} entries") else: print(packet.log) diff --git a/tests/test_stixshifter_diagnosis.py b/tests/test_stixshifter_diagnosis.py index f5efe47e..8bf65d49 100644 --- a/tests/test_stixshifter_diagnosis.py +++ b/tests/test_stixshifter_diagnosis.py @@ -15,7 +15,7 @@ def test_diagnosis(stixshifter_profile_lab101): diag.diagnose_ping() assert pattern == diag.diagnose_translate_query(pattern)["queries"][0] res = diag.diagnose_run_query_and_retrieval_result([pattern], 1) - assert len(res) == 1 and res[0] == 15 + assert len(res) == 1 and res[0] == 533 def test_cli(stixshifter_profile_lab101): @@ -68,12 +68,12 @@ def test_cli(stixshifter_profile_lab101): ## Diagnose: stix-shifter query execution: <=1 batch(s) #### data retrieval results: -one batch retrieved: 15 observations +one batch retrieved: 533 entries ## Diagnose: stix-shifter query execution: <=5 batch(s) #### data retrieval results: -one batch retrieved: 15 observations +one batch retrieved: 533 entries """ result = subprocess.run(args = ["stix-shifter-diag", "lab101"], From f3ea3ed2ce0f6cb91bfe474bf1e2054bc37a30d8 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 26 Jul 2023 20:27:00 -0400 Subject: [PATCH 8/8] v1.7.3 --- CHANGELOG.rst | 12 ++++++++++++ setup.cfg | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 654a73d0..5bb17432 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,18 @@ The format is based on `Keep a Changelog`_. Unreleased ========== +1.7.3 (2023-07-26) +================== + +Added +----- + +- stix-shifter data source interface diagnosis module +- ``stix-shifter-diag``: stix-shifter data source interface diagnosis utility +- Docs on ``stix-shifter-diag`` +- Error message update to point to ``stix-shifter-diag`` +- Unit tests of the diagnosis module and CLI utility + 1.7.2 (2023-07-26) ================== diff --git a/setup.cfg b/setup.cfg index bdca71cb..04619b3f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = kestrel-lang -version = 1.7.2 +version = 1.7.3 description = Kestrel Threat Hunting Language long_description = file:README.rst long_description_content_type = text/x-rst