Skip to content

Commit

Permalink
Merge pull request #262 from opencybersecurityalliance/develop
Browse files Browse the repository at this point in the history
v1.5.2
  • Loading branch information
subbyte authored Oct 26, 2022
2 parents f5d1de7 + 07c35b9 commit b89cf8f
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 7 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@ The format is based on `Keep a Changelog`_.
Unreleased
==========

1.5.2 (2022-10-26)
==================

Added
-----

- Relative path support for environment variable starting with ``KESTREL`` #248
- Relative path support for path in ``LOAD``/``SAVE``
- Relative path support for local uri, i.e., ``file://xxx`` or ``file://./xxx`` in ``GET``
- Unit test on relative path in environment variable
- Unit test on relative path in LOAD
- Unit test on relative path in data source in GET

1.5.1 (2022-10-25)
==================

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = kestrel-lang
version = 1.5.1
version = 1.5.2
description = Kestrel Threat Hunting Language
long_description = file:README.rst
long_description_content_type = text/x-rst
Expand Down
4 changes: 0 additions & 4 deletions src/kestrel/semantics/processor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import pathlib
import re

from kestrel.syntax.utils import get_all_input_var_names, timedelta_seconds
Expand Down Expand Up @@ -38,9 +37,6 @@ def semantics_processing(
if stmt["command"] == "get":
_process_datasource_in_get(stmt, symtable, data_source_manager)

if stmt["command"] == "load" or stmt["command"] == "save":
stmt["path"] = pathlib.Path(stmt["path"]).expanduser().resolve()

if stmt["command"] == "find":
_check_semantics_on_find(stmt, symtable[stmt["input"]].type)

Expand Down
4 changes: 3 additions & 1 deletion src/kestrel/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
from kestrel.symboltable.symtable import SymbolTable
from firepit import get_storage
from firepit.exceptions import StixPatternError
from kestrel.utils import set_current_working_directory
from kestrel.utils import set_current_working_directory, resolve_path_in_kestrel_env_var
from kestrel.config import load_config
from kestrel.datasource import DataSourceManager
from kestrel.analytics import AnalyticsManager
Expand Down Expand Up @@ -177,6 +177,8 @@ def __init__(
f"Establish session with session_id: {session_id}, runtime_dir: {runtime_dir}, store_path:{store_path}, debug_mode:{debug_mode}"
)

resolve_path_in_kestrel_env_var()

self.config = load_config()

if session_id:
Expand Down
5 changes: 4 additions & 1 deletion src/kestrel/syntax/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from lark.visitors import merge_transformers

from firepit.query import BinnedColumn
from kestrel.utils import unescape_quoted_string
from kestrel.utils import unescape_quoted_string, resolve_path
from kestrel.syntax.utils import resolve_uri
from kestrel.syntax.ecgpattern import (
ECGPComparison,
ECGPJunction,
Expand Down Expand Up @@ -329,12 +330,14 @@ def stdpath(self, args):
v = _first(args)
if args[0].type == "PATH_ESCAPED":
v = unescape_quoted_string(v)
v = resolve_path(v)
return {"path": v}

def datasource(self, args):
v = _first(args)
if args[0].type == "DATASRC_ESCAPED":
v = unescape_quoted_string(v)
v = ",".join(map(resolve_uri, v.split(",")))
return {"datasource": v}

def analytics_uri(self, args):
Expand Down
10 changes: 10 additions & 0 deletions src/kestrel/syntax/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from itertools import chain
from typing import Tuple, Iterable
import datetime
import os

from kestrel.utils import resolve_path
from kestrel.codegen.relations import (
all_relations,
stix_2_0_ref_mapping,
Expand Down Expand Up @@ -59,3 +61,11 @@ def merge_timeranges(trs: Iterable[Tuple[datetime.datetime, datetime.datetime]])
@typechecked
def timedelta_seconds(t: int):
return datetime.timedelta(seconds=t)


def resolve_uri(uri: str):
if uri.startswith("file://"):
path = uri[7:]
if os.path.exists(path):
uri = "file://" + resolve_path(path)
return uri
12 changes: 12 additions & 0 deletions src/kestrel/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,18 @@ def mkdtemp():
return p


def resolve_path_in_kestrel_env_var():
for key in os.environ:
if key.startswith("KESTREL") or key.startswith("kestrel"):
path = os.environ[key]
if os.path.exists(path):
os.environ[key] = resolve_path(path)


def resolve_path(path):
return os.path.abspath(os.path.expanduser(os.path.expandvars(path)))


class set_current_working_directory:
def __init__(self, new_cwd):
self.tmp_cwd = new_cwd
Expand Down
25 changes: 25 additions & 0 deletions tests/test_command_get.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import shutil

import pytest

Expand Down Expand Up @@ -148,6 +149,30 @@ def test_last_datasource(proc_bundle_file):
assert len(b) == 1408


def test_relative_file_path(tmp_path):
data_file_path = "doctored-1k.json"
ori_path = os.path.join(
os.path.dirname(__file__), data_file_path
)
shutil.copy2(ori_path, tmp_path)
os.chdir(tmp_path)

with Session() as s:
stmt = f"""
a = GET process
FROM file://{data_file_path}
WHERE name = "cmd.exe"
b = GET process
FROM file://./{data_file_path}
WHERE name = 'svchost.exe'
"""
output = s.execute(stmt)
a = s.get_variable("a")
b = s.get_variable("b")
assert len(a) == 28
assert len(b) == 1408


def test_get_wrong_type(file_stix_bundles):
with Session() as s:
stmt = f"""
Expand Down
17 changes: 17 additions & 0 deletions tests/test_command_load.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import os
import shutil

from kestrel.session import Session
from kestrel.exceptions import MissingEntityType
Expand All @@ -18,6 +19,22 @@ def test_load_full_csv():
assert v[0]["name"] == "reg.exe"


def test_load_relative_path_csv(tmp_path):
data_file_path = "test_input_data_procs.csv"
ori_path = os.path.join(
os.path.dirname(__file__), data_file_path
)
shutil.copy2(ori_path, tmp_path)
os.chdir(tmp_path)
with Session() as s:
stmt = f"newvar = LOAD {data_file_path}"
s.execute(stmt)
v = s.get_variable("newvar")
assert len(v) == 5
assert v[0]["type"] == "process"
assert v[0]["name"] == "reg.exe"


def test_load_full_json():
data_file_path = os.path.join(
os.path.dirname(__file__), "test_input_data_procs.json"
Expand Down
18 changes: 18 additions & 0 deletions tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import pathlib
import shutil
import tempfile
import kestrel
import kestrel_datasource_stixshifter
import pandas as pd

from kestrel.session import Session
Expand Down Expand Up @@ -316,3 +318,19 @@ def test_disp_after_group(fake_bundle_file):
out = session.execute("DISP grouped ATTR src_ref.value, dst_ref.value, count")
df = out[0].dataframe
assert list(df.columns) == ["src_ref.value", "dst_ref.value", "count"]


def test_env_var_resolve(tmp_path):
os.chdir(tmp_path)
config_name = "abc.yaml"
with open(config_name, "w") as config:
config.write(r"""
language:
default_variable: "_"
""")
os.environ[kestrel.config.CONFIG_PATH_ENV_VAR] = config_name
os.environ[kestrel_datasource_stixshifter.config.PROFILE_PATH_ENV_VAR] = config_name
s = Session()
full_path = os.path.join(os.getcwd(), config_name)
assert os.environ[kestrel.config.CONFIG_PATH_ENV_VAR] == full_path
assert os.environ[kestrel_datasource_stixshifter.config.PROFILE_PATH_ENV_VAR] == full_path

0 comments on commit b89cf8f

Please sign in to comment.