From aa6e81a71aa9f3f96f83394553927106bc67c337 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Fri, 15 Apr 2022 13:48:36 -0400
Subject: [PATCH 01/35] Improved autocompletion, plus unit test

---
 src/kestrel/session.py          | 41 ++++++++++++++++----
 src/kestrel/syntax/kestrel.lark | 34 +++++++++-------
 src/kestrel/syntax/parser.py    |  4 ++
 tests/test_completion.py        | 69 +++++++++++++++++++++++++++++++++
 4 files changed, 126 insertions(+), 22 deletions(-)
 create mode 100644 tests/test_completion.py

diff --git a/src/kestrel/session.py b/src/kestrel/session.py
index 14aaf9e7..5a05962b 100644
--- a/src/kestrel/session.py
+++ b/src/kestrel/session.py
@@ -368,7 +368,9 @@ def do_complete(self, code, cursor_pos):
             A list of suggested strings to complete the code.
         """
         prefix = code[:cursor_pos]
-        last_word = prefix.split(" ")[-1]
+        words = prefix.split(" ")
+        last_word = words[-1]
+        _logger.debug('code="%s" prefix="%s" last_word="%s"', code, prefix, last_word)
 
         if "START" in prefix or "STOP" in prefix:
             return self._get_complete_timestamp(last_word)
@@ -397,19 +399,30 @@ def do_complete(self, code, cursor_pos):
             _logger.debug("standard auto-complete")
 
             try:
-                self.parse(prefix)
+                stmt = self.parse(prefix)
+                _logger.debug("first parse: %s", stmt)
+                last_stmt = stmt[-1]
+                if last_stmt["command"] == "assign" and last_stmt["output"] == "_":
+                    # Special case for a varname alone on a line
+                    allnames = [
+                        v for v in self.get_variable_names() if v.startswith(prefix)
+                    ]
+                    if not allnames:
+                        return ["=", "+"] if prefix.endswith(" ") else []
 
                 # If it parses successfully, add something so it will fail
                 self.parse(prefix + " @autocompletions@")
             except KestrelSyntaxError as e:
+                _logger.debug("exception: %s", e)
+                varnames = self.get_variable_names()
                 tmp = []
                 for token in e.expected:
                     if token == "VARIABLE":
-                        tmp.extend(self.get_variable_names())
+                        tmp.extend(varnames)
                     elif token == "DATASRC":
                         schemes = self.data_source_manager.schemes()
                         tmp.extend([f"{scheme}://" for scheme in schemes])
-                        tmp.extend(self.get_variable_names())
+                        tmp.extend(varnames)
                     elif token == "ANALYTICS":
                         schemes = self.analytics_manager.schemes()
                         tmp.extend([f"{scheme}://" for scheme in schemes])
@@ -418,12 +431,23 @@ def do_complete(self, code, cursor_pos):
                     elif token.startswith("STIXPATH"):
                         # TODO: figure out the varname and get its attrs
                         continue
+                    elif token.startswith("STIXPATTERNBODY"):
+                        # TODO: figure out how to complete STIX patterns
+                        continue
                     elif token == "RELATION":
-                        tmp.extend(all_relations)
+                        if last_word:
+                            tmp.extend(get_entity_types())
+                        else:
+                            tmp.extend(all_relations)
                     elif token == "REVERSED":
                         tmp.append("BY")
-                        varnames = self.get_variable_names()
-                        if last_word not in varnames:
+                        prev_word = words[-2] if len(words) >= 2 else ""
+                        _logger.debug("prev_word = %s", prev_word)
+                        if prev_word in all_relations:
+                            pass
+                        elif prev_word in varnames:
+                            pass
+                        elif last_word not in varnames:
                             # Must be FIND and not GROUP
                             tmp.extend(all_relations)
                     elif token == "FUNCNAME":
@@ -434,6 +458,8 @@ def do_complete(self, code, cursor_pos):
                         continue
                     elif token.startswith("__ANON"):
                         continue
+                    elif token == "EQUAL":
+                        tmp.append("=")
                     else:
                         tmp.append(token)
                 allnames = sorted(tmp)
@@ -441,6 +467,7 @@ def do_complete(self, code, cursor_pos):
         suggestions = [
             name[len(last_word) :] for name in allnames if name.startswith(last_word)
         ]
+        _logger.debug("%s -> %s", allnames, suggestions)
         return suggestions
 
     def close(self):
diff --git a/src/kestrel/syntax/kestrel.lark b/src/kestrel/syntax/kestrel.lark
index e70ce221..1c6fc68d 100644
--- a/src/kestrel/syntax/kestrel.lark
+++ b/src/kestrel/syntax/kestrel.lark
@@ -8,23 +8,27 @@ start: statement*
 // If no VARIABLE is given, default to _ in post-parsing
 //
 
-statement: VARIABLE "=" command
-         | command
+statement: assignment
+         | command_no_result
          
+assignment: VARIABLE "=" command_with_result
+          | command_with_result
+
 // "?" at the beginning will inline command
-?command: get
-        | find
-        | disp
-        | info
-        | apply
-        | join
-        | sort
-        | group
-        | load
-        | save
-        | new
-        | merge
-        | assign
+?command_with_result: get
+                    | find
+                    | join
+                    | sort
+                    | group
+                    | load
+                    | new
+                    | merge
+                    | assign
+
+?command_no_result: disp
+                  | info
+                  | apply
+                  | save
 
 assign: expression
 
diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py
index 4d116315..f776fdac 100644
--- a/src/kestrel/syntax/parser.py
+++ b/src/kestrel/syntax/parser.py
@@ -40,6 +40,10 @@ def start(self, args):
     def statement(self, args):
         # Kestrel syntax: a statement can only has one command
         stmt = args.pop()
+        return stmt
+
+    def assignment(self, args):
+        stmt = args[1] if len(args) == 2 else args[0]
         stmt["output"] = _extract_var(args, self.default_variable)
         return stmt
 
diff --git a/tests/test_completion.py b/tests/test_completion.py
new file mode 100644
index 00000000..2d51ceb7
--- /dev/null
+++ b/tests/test_completion.py
@@ -0,0 +1,69 @@
+import os
+import pytest
+from kestrel.codegen.relations import all_relations
+from kestrel.session import Session
+from kestrel.syntax.utils import (
+    LITERALS,
+    AGG_FUNCS,
+    TRANSFORMS,
+)
+
+
+KNOWN_ETYPES = {
+    'artifact', 'autonomous-system', 'directory', 'domain-name',
+    'email-addr', 'email-message', 'file', 'ipv4-addr', 'ipv6-addr',
+    'mac-addr', 'mutex', 'network-traffic', 'process', 'software',
+    'url', 'user-account', 'windows-registry-key', 'x-ibm-finding',
+    'x-oca-asset', 'x-oca-event'
+}
+
+
+@pytest.fixture
+def a_session():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    bundle = os.path.join(cwd, "test_bundle.json")
+    session = Session(debug_mode=True)
+    stmt = ("conns = get network-traffic"
+            f" from file://{bundle}"
+            " where [network-traffic:dst_port < 10000]")
+    session.execute(stmt)
+    return session
+
+
+@pytest.mark.parametrize(
+    "code, expected",
+    [
+        ("x", []),  # No suggestions
+        ("x ", {"=", "+"}),
+        ("c", {"onns"}),
+        ("conns", ['']),  # Empty string means word is complete
+        ("conns ", {"=", "+"}),
+        ("disp ", {"conns", "_"} | TRANSFORMS),
+        ("procs = ", {"GET", "FIND", "JOIN", "SORT", "GROUP", "LOAD", "NEW", "conns", "_"} | TRANSFORMS),
+        ("procs = G", {"ET", "ROUP"}),
+        ("procs = F", {"IND"}),
+        ("procs = FI", {"ND"}),
+        ("procs = FIN", {"D"}),
+        ("procs = FIND", []),
+        ("procs = FIND ", KNOWN_ETYPES),
+        ("procs = FIND p", ["rocess"]),
+        ("procs = FIND process", ['']),
+        #("procs = FIND process ", {"created", "loaded", "linked"}),
+        ("procs = FIND process ", all_relations),
+        ("procs = FIND process l", {"oaded", "inked"}),
+        ("procs = FIND process c", {"reated", "ontained", "onns"}),  # FIXME: shouldn't suggest var here
+        ("procs = FIND process created ", {"conns", "_", "BY"}),
+        ("procs = FIND process created BY ", {"conns", "_"}),
+        ("grps = GR", {"OUP"}),
+        ("grps = GROUP ", {"conns", "_"}),
+        ("grps = GROUP conns ", {"BY"}),
+        ("grps = GROUP conns by ", []),  # TODO: we don't suggest attrs yet
+        ("urls = get ", KNOWN_ETYPES),
+        ("urls = get url ", ["FROM", "WHERE"]),
+        ("urls = get url from ", ["_", "conns", "file://", "http://", "https://", "stixshifter://"]),
+        ("urls = get url where ", []),
+   ]
+)
+def test_do_complete_after_get(a_session, code, expected):
+    result = a_session.do_complete(code, len(code))
+    assert set(result) == set(expected)

From 979e6b4cdae67d93b38918d9fa2d33abc6e148d0 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Sun, 17 Apr 2022 00:15:35 -0400
Subject: [PATCH 02/35] Update GOVERNANCE.rst

---
 GOVERNANCE.rst | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/GOVERNANCE.rst b/GOVERNANCE.rst
index e40f16a0..b3510b60 100644
--- a/GOVERNANCE.rst
+++ b/GOVERNANCE.rst
@@ -46,43 +46,44 @@ Release Procedure
 
 A maintainer should release a new Kestrel runtime (PyPI package name: ``kestre-lang``) following the procedure:
 
-1. Update version and changelog
+#. Update version and changelog
 
-    - Sync the local git repo to the latest of the ``develop`` branch.
-    - Update the ``version`` field in ``setup.cfg``.
-    - Add changes in ``CHANGELOG.rst`` under a new version section.
-    - Add new contributors to ``AUTHORS.rst`` if any.
-    - Commit the updates with the new version number as the message.
-    - Push the local ``develop`` branch to remote.
+    #. Sync the local git repo to the latest of the ``develop`` branch.
+    #. Update the ``version`` field in ``setup.cfg``.
+    #. Add changes in ``CHANGELOG.rst`` under a new version section.
+    #. Add new contributors to ``AUTHORS.rst`` if any.
+    #. Commit the updates with the new version number as the message.
+    #. Push the local ``develop`` branch to remote.
 
-2. Graduate code to the ``release`` branch
+#. Graduate code to the ``release`` branch
 
-    - Open a PR to merge the ``develop`` branch to the ``release`` branch. Use the version number as the PR title.
+    #. Open a PR to merge the ``develop`` branch to the ``release`` branch. Use the version number as the PR title.
 
-    - Merge the PR.
+    #. Merge the PR.
 
-3. Create a new release
+#. Create a new release
 
-    - Go to the release page and click *Draft a new release*.
+    #. Go to the release page and click *Draft a new release*.
 
-    - Type the version number as the new tag to create.
+    #. Type the version number as the new tag to create.
 
-    - Choose ``release`` branch as the *Target*.
+    #. Choose ``release`` branch as the *Target*.
 
-    - Specify a release title. Use the version number for ordinary release.
+    #. Specify a release title. Use the version number for ordinary release.
 
-    - Write a summary of the release.
+    #. Write a summary of the release.
 
         - Patch number release: copy the CHANGELOG entries.
 
         - Minor number release: may have a TLDR at the beginning highlighting the most important new feature.
 
-    - Hit the *Publish release* button.
+    #. Hit the *Publish release* button.
 
-4. After release check
+#. After release check
 
     - Check `kestrel-lang on PyPI`_ after a few minutes to confirm new package built and released.
     - May activate/pin the released version of Kestrel documentation at `readthedocs version control`_.
+    - Announce the release at OCA Kestrel channel.
 
 Vulnerability Disclosure
 ------------------------

From 13ba0f4342eaa33d2709526352050a664a4f6906 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Mon, 18 Apr 2022 14:23:30 -0400
Subject: [PATCH 03/35] Add DisplayWarning

---
 src/kestrel/codegen/commands.py | 17 ++++++++++++++---
 src/kestrel/codegen/display.py  | 17 +++++++++++++++++
 tests/test_command_get.py       | 21 ++++++++++++++++++++-
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/kestrel/codegen/commands.py b/src/kestrel/codegen/commands.py
index 5128181f..0d0a2cbf 100644
--- a/src/kestrel/codegen/commands.py
+++ b/src/kestrel/codegen/commands.py
@@ -31,8 +31,9 @@
 from kestrel.semantics import get_entity_table, get_entity_type
 from kestrel.symboltable import new_var
 from kestrel.syntax.parser import get_all_input_var_names
+from kestrel.syntax.utils import get_entity_types
 from kestrel.codegen.data import load_data, load_data_file, dump_data_to_file
-from kestrel.codegen.display import DisplayDataframe, DisplayDict
+from kestrel.codegen.display import DisplayDataframe, DisplayDict, DisplayWarning
 from kestrel.codegen.pattern import build_pattern, build_pattern_from_ids
 from kestrel.codegen.queries import (
     compile_specific_relation_to_query,
@@ -226,6 +227,7 @@ def get(stmt, session):
     return_type = stmt["type"]
     start_offset = session.config["stixquery"]["timerange_start_offset"]
     end_offset = session.config["stixquery"]["timerange_stop_offset"]
+    display = None
 
     pattern = build_pattern(
         stmt["patternbody"],
@@ -237,10 +239,14 @@ def get(stmt, session):
     )
 
     if "variablesource" in stmt:
+        input_type = get_entity_table(stmt["variablesource"], session.symtable)
+        output_type = stmt["type"]
+        if input_type != output_type:
+            pass  # TODO: new exception type?
         session.store.filter(
             stmt["output"],
             stmt["type"],
-            get_entity_table(stmt["variablesource"], session.symtable),
+            input_type,
             pattern,
         )
         output = new_var(session.store, return_var_table, [], stmt, session.symtable)
@@ -325,10 +331,15 @@ def get(stmt, session):
 
         output = new_var(session.store, return_var_table, [], stmt, session.symtable)
 
+        if not len(output):
+            if not return_type.startswith("x-") and return_type not in (
+                set(session.store.types()) | set(get_entity_types())
+            ):
+                display = DisplayWarning(f'unknown entity type "{return_type}"')
     else:
         raise KestrelInternalError(f"unknown type of source in {str(stmt)}")
 
-    return output, None
+    return output, display
 
 
 @_debug_logger
diff --git a/src/kestrel/codegen/display.py b/src/kestrel/codegen/display.py
index 33ca1e0d..e05d468f 100644
--- a/src/kestrel/codegen/display.py
+++ b/src/kestrel/codegen/display.py
@@ -163,3 +163,20 @@ def __init__(self, figure):
         figure.savefig(vfile, format="svg")
         svg = vfile.getvalue()
         super().__init__(svg)
+
+
+class DisplayWarning(AbstractDisplay):
+    def __init__(self, text):
+        self.text = text
+
+    def to_string(self):
+        return self.text
+
+    def to_html(self):
+        return f'<div class="warning">[WARNING] {self.text}</div>'
+
+    def to_json(self):
+        return json.dumps(self.to_dict())
+
+    def to_dict(self):
+        return {"display": "warning", "data": self.text}
diff --git a/tests/test_command_get.py b/tests/test_command_get.py
index 63bb8ef8..b42e367e 100644
--- a/tests/test_command_get.py
+++ b/tests/test_command_get.py
@@ -1,7 +1,9 @@
-import pytest
 import json
 import os
 
+import pytest
+
+from kestrel.codegen.display import DisplayWarning
 from kestrel.session import Session
 
 
@@ -80,3 +82,20 @@ def test_get_multiple_stixshifter_stix_bundles(set_stixshifter_stix_bundles):
                 "teamviewer_service.exe", "teamviewer.exe", "vmware.exe", "dashost.exe",
                 "applemobiledeviceservice.exe", "svctest.exe", "vmware-hostd.exe"]
 
+
+def test_get_wrong_type(file_stix_bundles):
+    with Session() as s:
+        stmt = f"var = GET foo FROM file://{file_stix_bundles[0]} WHERE [process:name='compattelrunner.exe']"
+
+        output = s.execute(stmt)
+        warnings = []
+        for o in output:
+            print(json.dumps(o.to_dict(), indent=4))
+            if isinstance(o, DisplayWarning):
+                warnings.append(o)
+        assert len(warnings) == 1
+        assert "foo" in warnings[0].to_string()
+        v = s.get_variable("var")
+        print(json.dumps(v, indent=4))
+        assert len(v) == 0
+        

From a6187961b761ff41032b56507540d3e1e6ecf4a7 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Mon, 18 Apr 2022 23:31:52 -0400
Subject: [PATCH 04/35] fix #205 with firepit query gen (4 cases) for refs

---
 src/kestrel/codegen/queries.py    | 51 ++++++++++++++++++++----
 tests/kestrel_python_analytics.py |  1 +
 tests/test_command_disp.py        |  4 +-
 tests/test_command_find.py        | 65 +++++++++++++++++++------------
 tests/test_command_get.py         | 45 +++++++++++++--------
 tests/test_command_group.py       | 30 +++++++++-----
 tests/test_parser.py              | 40 +++++--------------
 tests/test_timestamped.py         | 18 +++++----
 8 files changed, 157 insertions(+), 97 deletions(-)

diff --git a/src/kestrel/codegen/queries.py b/src/kestrel/codegen/queries.py
index 8557ca01..9e12e015 100644
--- a/src/kestrel/codegen/queries.py
+++ b/src/kestrel/codegen/queries.py
@@ -28,29 +28,63 @@ def compile_specific_relation_to_query(
     stix_src_refs, stix_tgt_refs = stix_2_0_ref_mapping[(entity_x, relation, entity_y)]
 
     for ref_name in stix_src_refs:
+        # e.g., # STIX: ("process", "created", "network-traffic"): (["opened_connection_refs"], [])
+        #       # type(p) == process; is_reversed == True
+        #       nt = FIND network-traffic CREATED BY p
+        #       # type(nt) == network-traffic; is_reversed == False
+        #       p = FIND process CREATED nt
+        #
+        # It is just aligned that is_reversed == whether input_var is
+        # - EntityX in stix_2_0_ref_mapping
+        # - the source_ref in the __reflist table of firepit v2.0
+        var_is_source = is_reversed
+
+        (var_attr, ret_attr) = (ref_name, "id") if var_is_source else ("id", ref_name)
+
         # if there are multiple options, use first one found in DB
-        (var_attr, ret_attr) = (ref_name, "id") if is_reversed else ("id", ref_name)
         if ref_name.endswith("_refs"):
-            query = _generate_reflist_query(input_var_name, ref_name, entity_y)
+            query = _generate_reflist_query(
+                input_var_name, var_is_source, ref_name, return_type
+            )
+
         elif var_attr in input_var_attrs and ret_attr in return_type_attrs:
             query = _generate_ref_query(
                 input_var_name, input_type, var_attr, return_type, ret_attr
             )
+
         else:
             continue
+
         return query
 
     for ref_name in stix_tgt_refs:
+        # e.g., # STIX: ("autonomous-system", "owned", "ipv4-addr"): ([], ["belongs_to_refs"])
+        #       # type(a) == autonomous-system; is_reversed == True
+        #       ip = FIND ipv4-addr OWNED BY a
+        #       # type(ip) == ipv4-addr; is_reversed == False
+        #       a = FIND autonomous-system OWNED ip
+        #
+        # It is just aligned that (not is_reversed) == whether input_var is
+        # - EntityX in stix_2_0_ref_mapping
+        # - the source_ref in the __reflist table of firepit v2.0
+        var_is_source = not is_reversed
+
+        (var_attr, ret_attr) = (ref_name, "id") if var_is_source else ("id", ref_name)
+
         # if there are multiple options, use first one found in DB
-        (var_attr, ret_attr) = ("id", ref_name) if is_reversed else (ref_name, "id")
         if ref_name.endswith("_refs"):
-            query = _generate_reflist_query(input_var_name, ref_name, entity_x)
+            query = _generate_reflist_query(
+                input_var_name, var_is_source, ref_name, return_type
+            )
+
         elif var_attr in input_var_attrs and ret_attr in return_type_attrs:
             query = _generate_ref_query(
                 input_var_name, input_type, var_attr, return_type, ret_attr
             )
+
         else:
             continue
+
         return query
 
     return None
@@ -111,12 +145,15 @@ def _generate_ref_query(input_var_name, input_type, var_attr, ret_type, ret_attr
     )
 
 
-def _generate_reflist_query(input_var_name, ref_name, entity_y):
+def _generate_reflist_query(input_var_name, var_is_source, ref_name, entity_y):
+    var_ref_pos, y_ref_pos = (
+        ("source_ref", "target_ref") if var_is_source else ("target_ref", "source_ref")
+    )
     return Query(
         [
             Table(input_var_name),
-            Join("__reflist", "id", "=", "source_ref"),
-            Join(entity_y, "target_ref", "=", "id"),
+            Join("__reflist", "id", "=", var_ref_pos),
+            Join(entity_y, y_ref_pos, "=", "id"),
             Filter([Predicate("ref_name", "=", ref_name)]),
             Projection([Column("*", entity_y)]),  # All columns from entity_y
             Unique(),
diff --git a/tests/kestrel_python_analytics.py b/tests/kestrel_python_analytics.py
index 667980ac..1129d89f 100644
--- a/tests/kestrel_python_analytics.py
+++ b/tests/kestrel_python_analytics.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+
 def enrich_one_variable(dataframe):
     newattr = ["newval" + str(i) for i in range(dataframe.shape[0])]
     dataframe["x_new_attr"] = newattr
diff --git a/tests/test_command_disp.py b/tests/test_command_disp.py
index fbc71264..69d6f307 100644
--- a/tests/test_command_disp.py
+++ b/tests/test_command_disp.py
@@ -33,7 +33,7 @@ def test_disp_grouped_procs():
         s.execute(stmt)
         s.execute("grpvar = group newvar by name")
         out = s.execute("DISP grpvar")
-        data = out[0].to_dict()['data']
+        data = out[0].to_dict()["data"]
         assert len(data) == 2
 
 
@@ -48,5 +48,5 @@ def test_disp_grouped_conns():
         s.execute(stmt)
         s.execute("grpvar = group newvar by dst_ref.value")
         out = s.execute("DISP grpvar")
-        data = out[0].to_dict()['data']
+        data = out[0].to_dict()["data"]
         assert len(data) == 2
diff --git a/tests/test_command_find.py b/tests/test_command_find.py
index d9a86247..21e6a3df 100644
--- a/tests/test_command_find.py
+++ b/tests/test_command_find.py
@@ -66,28 +66,10 @@ def test_find_srcs(fake_bundle_file):
 srcs = FIND ipv4-addr CREATED conns
 """
         s.execute(stmt)
-        srcs = s.get_variable('srcs')
+        srcs = s.get_variable("srcs")
         assert len(srcs) == 24
 
 
-def test_find_procs(proc_bundle_file):
-    with Session() as s:
-        stmt = f"""
-procs = get process
-        from file://{proc_bundle_file}
-        where [process:name LIKE '%']
-conns = FIND network-traffic CREATED BY procs
-"""
-        s.execute(stmt)
-        conns = s.get_variable('conns')
-        assert len(conns) == 853  # FIXME: should be 948, I think (id collisions for network-traffic)
-
-        # DISP with a ref (parent_ref) and ambiguous column (command_line)
-        disp_out = s.execute("DISP procs ATTR name, parent_ref.name, command_line")
-        data = disp_out[0].to_dict()["data"]
-        print(json.dumps(data, indent=4))
-
-
 def test_find_file_linked_to_process(proc_bundle_file):
     with Session() as s:
         stmt = f"""
@@ -97,12 +79,12 @@ def test_find_file_linked_to_process(proc_bundle_file):
 files = FIND file LINKED procs
 """
         s.execute(stmt)
-        procs = s.get_variable('procs')
+        procs = s.get_variable("procs")
         print(json.dumps(procs, indent=4))
         assert len(procs) == 7 * 3  # TEMP: 3 records per entity
-        files = s.get_variable('files')
+        files = s.get_variable("files")
         print(json.dumps(files, indent=4))
-        assert len(files) == 6  #TODO: double check this count
+        assert len(files) == 6  # TODO: double check this count
 
 
 def test_find_file_loaded_by_process(proc_bundle_file):
@@ -114,10 +96,10 @@ def test_find_file_loaded_by_process(proc_bundle_file):
 files = FIND file LOADED BY procs
 """
         s.execute(stmt)
-        procs = s.get_variable('procs')
+        procs = s.get_variable("procs")
         print(json.dumps(procs, indent=4))
         assert len(procs) == 7 * 3  # TEMP: 3 records per entity
-        files = s.get_variable('files')
+        files = s.get_variable("files")
         print(json.dumps(files, indent=4))
         assert len(files) == 1
 
@@ -131,6 +113,39 @@ def test_find_process_created_process(proc_bundle_file):
 parents = FIND process CREATED procs
 """
         s.execute(stmt)
-        data = s.get_variable('parents')
+        data = s.get_variable("parents")
         print(json.dumps(data, indent=4))
         assert len(data)
+
+
+def test_find_refs_resolution_not_reversed_src_ref(proc_bundle_file):
+    with Session() as s:
+        stmt = f"""
+nt = get network-traffic
+     from file://{proc_bundle_file}
+     where [network-traffic:src_port > 0]
+p = FIND process CREATED nt
+"""
+        s.execute(stmt)
+        p = s.get_variable("p")
+        assert len(p) == 1897
+
+
+def test_find_refs_resolution_reversed_src_ref(proc_bundle_file):
+    with Session() as s:
+        stmt = f"""
+procs = get process
+        from file://{proc_bundle_file}
+        where [process:name LIKE '%']
+conns = FIND network-traffic CREATED BY procs
+"""
+        s.execute(stmt)
+        conns = s.get_variable("conns")
+        assert (
+            len(conns) == 853
+        )  # FIXME: should be 948, I think (id collisions for network-traffic)
+
+        # DISP with a ref (parent_ref) and ambiguous column (command_line)
+        disp_out = s.execute("DISP procs ATTR name, parent_ref.name, command_line")
+        data = disp_out[0].to_dict()["data"]
+        print(json.dumps(data, indent=4))
diff --git a/tests/test_command_get.py b/tests/test_command_get.py
index 63bb8ef8..5dc286fd 100644
--- a/tests/test_command_get.py
+++ b/tests/test_command_get.py
@@ -8,24 +8,26 @@
 @pytest.fixture()
 def file_stix_bundles():
     cwd = os.path.dirname(os.path.abspath(__file__))
-    return [os.path.join(cwd, "test_bundle_4.json"),
-            os.path.join(cwd, "test_bundle_5.json")]
+    return [
+        os.path.join(cwd, "test_bundle_4.json"),
+        os.path.join(cwd, "test_bundle_5.json"),
+    ]
 
 
 @pytest.fixture()
 def set_stixshifter_stix_bundles():
     cfg = '{"auth": {"username": "","password": ""}}'
-    connector = 'stix_bundle'
-    stixshifter_data_url = 'https://raw.githubusercontent.com/opencybersecurityalliance/stix-shifter/develop/data/cybox'
+    connector = "stix_bundle"
+    stixshifter_data_url = "https://raw.githubusercontent.com/opencybersecurityalliance/stix-shifter/develop/data/cybox"
     host1 = f"{stixshifter_data_url}/carbon_black/cb_observed_156.json"
     host2 = f"{stixshifter_data_url}/qradar/qradar_custom_process_observable.json"
 
-    os.environ['STIXSHIFTER_HOST1_CONNECTION'] = json.dumps({"host": host1})
-    os.environ['STIXSHIFTER_HOST1_CONNECTOR'] = connector
-    os.environ['STIXSHIFTER_HOST1_CONFIG'] = cfg
-    os.environ['STIXSHIFTER_HOST2_CONNECTION'] = json.dumps({"host": host2})
-    os.environ['STIXSHIFTER_HOST2_CONNECTOR'] = connector
-    os.environ['STIXSHIFTER_HOST2_CONFIG'] = cfg
+    os.environ["STIXSHIFTER_HOST1_CONNECTION"] = json.dumps({"host": host1})
+    os.environ["STIXSHIFTER_HOST1_CONNECTOR"] = connector
+    os.environ["STIXSHIFTER_HOST1_CONFIG"] = cfg
+    os.environ["STIXSHIFTER_HOST2_CONNECTION"] = json.dumps({"host": host2})
+    os.environ["STIXSHIFTER_HOST2_CONNECTOR"] = connector
+    os.environ["STIXSHIFTER_HOST2_CONFIG"] = cfg
 
 
 def test_get_single_file(file_stix_bundles):
@@ -42,7 +44,7 @@ def test_get_single_file(file_stix_bundles):
 
 def test_get_multiple_file_stix_bundles(file_stix_bundles):
     with Session() as s:
-        file_bundles = ','.join(file_stix_bundles)
+        file_bundles = ",".join(file_stix_bundles)
         stmt = f"var = GET process FROM file://{file_bundles} WHERE [process:name='compattelrunner.exe']"
 
         s.execute(stmt)
@@ -68,7 +70,9 @@ def test_get_single_stixshifter_stix_bundle(set_stixshifter_stix_bundles):
 def test_get_multiple_stixshifter_stix_bundles(set_stixshifter_stix_bundles):
     with Session() as s:
         # default data source schema is stixshifter
-        stmt = "var = GET process FROM HOST1,HOST2 WHERE [ipv4-addr:value = '127.0.0.1']"
+        stmt = (
+            "var = GET process FROM HOST1,HOST2 WHERE [ipv4-addr:value = '127.0.0.1']"
+        )
 
         s.execute(stmt)
         v = s.get_variable("var")
@@ -76,7 +80,16 @@ def test_get_multiple_stixshifter_stix_bundles(set_stixshifter_stix_bundles):
         for i in range(len(v)):
             assert v[i]["type"] == "process"
             assert v[i]["name"] in [
-                "powershell.exe", "(unknown)", "explorer.exe", "firefox.exe", "ntoskrnl.exe",
-                "teamviewer_service.exe", "teamviewer.exe", "vmware.exe", "dashost.exe",
-                "applemobiledeviceservice.exe", "svctest.exe", "vmware-hostd.exe"]
-
+                "powershell.exe",
+                "(unknown)",
+                "explorer.exe",
+                "firefox.exe",
+                "ntoskrnl.exe",
+                "teamviewer_service.exe",
+                "teamviewer.exe",
+                "vmware.exe",
+                "dashost.exe",
+                "applemobiledeviceservice.exe",
+                "svctest.exe",
+                "vmware-hostd.exe",
+            ]
diff --git a/tests/test_command_group.py b/tests/test_command_group.py
index 9df9f982..b5881661 100644
--- a/tests/test_command_group.py
+++ b/tests/test_command_group.py
@@ -38,9 +38,13 @@ def test_group_src_dst(fake_bundle_file):
             where [network-traffic:dst_port > 0]""",
         )
 
-        session.execute(("grps = group conns by "
-                         "network-traffic:src_ref.value,"
-                         "network-traffic:dst_ref.value"))
+        session.execute(
+            (
+                "grps = group conns by "
+                "network-traffic:src_ref.value,"
+                "network-traffic:dst_ref.value"
+            )
+        )
         assert "grps" in session.get_variable_names()
         grps = session.get_variable("grps")
         assert grps is not None
@@ -53,7 +57,7 @@ def test_group_src_dst(fake_bundle_file):
         ("max", "dst_ref.value", "max_dst_ref.value"),
         ("count", "dst_ref.value", "count_dst_ref.value"),
         ("nunique", "dst_ref.value", "nunique_dst_ref.value"),
-    ]
+    ],
 )
 def test_group_srcref_agg(fake_bundle_file, agg_func, attr, expected):
     with Session(debug_mode=True) as session:
@@ -63,8 +67,12 @@ def test_group_srcref_agg(fake_bundle_file, agg_func, attr, expected):
             where [network-traffic:dst_port > 0]""",
         )
 
-        session.execute(("src_grps = group conns by network-traffic:src_ref.value"
-                         f" with {agg_func}({attr})"))
+        session.execute(
+            (
+                "src_grps = group conns by network-traffic:src_ref.value"
+                f" with {agg_func}({attr})"
+            )
+        )
         assert "src_grps" in session.get_variable_names()
         src_grps = session.get_variable("src_grps")
         assert src_grps is not None
@@ -77,7 +85,7 @@ def test_group_srcref_agg(fake_bundle_file, agg_func, attr, expected):
         ("max", "dst_ref.value", "rand_value"),
         ("count", "dst_ref.value", "whatever"),
         ("nunique", "dst_ref.value", "unique_dests"),
-    ]
+    ],
 )
 def test_group_srcref_agg_alias(fake_bundle_file, agg_func, attr, alias):
     with Session(debug_mode=True) as session:
@@ -87,8 +95,12 @@ def test_group_srcref_agg_alias(fake_bundle_file, agg_func, attr, alias):
             where [network-traffic:dst_port > 0]""",
         )
 
-        session.execute(("src_grps = group conns by network-traffic:src_ref.value"
-                         f" with {agg_func}({attr}) as {alias}"))
+        session.execute(
+            (
+                "src_grps = group conns by network-traffic:src_ref.value"
+                f" with {agg_func}({attr}) as {alias}"
+            )
+        )
         assert "src_grps" in session.get_variable_names()
         src_grps = session.get_variable("src_grps")
         assert src_grps is not None
diff --git a/tests/test_parser.py b/tests/test_parser.py
index aa800df8..8b84f2f4 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -109,11 +109,7 @@ def test_grouping_1():
     assert result["input"] == "x"
     assert result["paths"] == ["foo"]
     assert result["aggregations"] == [
-        {
-            'attr': 'baz',
-            'func': 'sum',
-            'alias': 'sum_baz'
-        },
+        {"attr": "baz", "func": "sum", "alias": "sum_baz"},
     ]
 
 
@@ -125,40 +121,22 @@ def test_grouping_2():
     assert result["input"] == "x"
     assert result["paths"] == ["foo", "bar"]
     assert result["aggregations"] == [
-        {
-            'attr': 'baz',
-            'func': 'max',
-            'alias': 'biggest'
-        },
-        {
-            'attr': 'blah',
-            'func': 'min',
-            'alias': 'min_blah'
-        },
+        {"attr": "baz", "func": "max", "alias": "biggest"},
+        {"attr": "blah", "func": "min", "alias": "min_blah"},
     ]
 
 
 def test_grouping_3():
-    results = parse("y = group x by foo with avg(bar), count(baz), max(blah) as whatever")
+    results = parse(
+        "y = group x by foo with avg(bar), count(baz), max(blah) as whatever"
+    )
     result = results[0]
     print(result)
     assert result["command"] == "group"
     assert result["input"] == "x"
     assert result["paths"] == ["foo"]
     assert result["aggregations"] == [
-        {
-            'attr': 'bar',
-            'func': 'avg',
-            'alias': 'avg_bar'
-        },
-        {
-            'attr': 'baz',
-            'func': 'count',
-            'alias': 'count_baz'
-        },
-        {
-            'attr': 'blah',
-            'func': 'max',
-            'alias': 'whatever'
-        },
+        {"attr": "bar", "func": "avg", "alias": "avg_bar"},
+        {"attr": "baz", "func": "count", "alias": "count_baz"},
+        {"attr": "blah", "func": "max", "alias": "whatever"},
     ]
diff --git a/tests/test_timestamped.py b/tests/test_timestamped.py
index f66d7ad9..25d31793 100644
--- a/tests/test_timestamped.py
+++ b/tests/test_timestamped.py
@@ -21,27 +21,31 @@ def test_timestamped_disp(fake_bundle_file):
 """
         s.execute(stmt)
         out = s.execute("DISP conns")
-        data = out[0].to_dict()['data']
+        data = out[0].to_dict()["data"]
         assert len(data) == 29
         assert "first_observed" not in data[0]
         out = s.execute("DISP TIMESTAMPED(conns)")
-        data = out[0].to_dict()['data']
+        data = out[0].to_dict()["data"]
         assert len(data) == 29
         assert "first_observed" in data[0]
         out = s.execute("DISP TIMESTAMPED(conns) LIMIT 5")
-        data = out[0].to_dict()['data']
+        data = out[0].to_dict()["data"]
         assert len(data) == 5
         assert "first_observed" in data[0]
-        out = s.execute("DISP TIMESTAMPED(conns) ATTR first_observed, src_ref.value, src_port")
-        data = out[0].to_dict()['data']
+        out = s.execute(
+            "DISP TIMESTAMPED(conns) ATTR first_observed, src_ref.value, src_port"
+        )
+        data = out[0].to_dict()["data"]
         assert len(data) == 29
         assert "first_observed" in data[0]
         assert "src_ref.value" in data[0]
         assert "src_port" in data[0]
         assert "dst_ref.value" not in data[0]
         assert "dst_port" not in data[0]
-        out = s.execute("DISP TIMESTAMPED(conns) ATTR first_observed, src_ref.value, src_port LIMIT 5")
-        data = out[0].to_dict()['data']
+        out = s.execute(
+            "DISP TIMESTAMPED(conns) ATTR first_observed, src_ref.value, src_port LIMIT 5"
+        )
+        data = out[0].to_dict()["data"]
         assert len(data) == 5
         assert "first_observed" in data[0]
         assert "src_ref.value" in data[0]

From e85b72d7ee415a3c862f70cf731d1c8edaa15eef Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Mon, 18 Apr 2022 23:48:21 -0400
Subject: [PATCH 05/35] minor comment fix

---
 src/kestrel/codegen/queries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/kestrel/codegen/queries.py b/src/kestrel/codegen/queries.py
index 9e12e015..044cdc14 100644
--- a/src/kestrel/codegen/queries.py
+++ b/src/kestrel/codegen/queries.py
@@ -65,7 +65,7 @@ def compile_specific_relation_to_query(
         #       a = FIND autonomous-system OWNED ip
         #
         # It is just aligned that (not is_reversed) == whether input_var is
-        # - EntityX in stix_2_0_ref_mapping
+        # - EntityY in stix_2_0_ref_mapping
         # - the source_ref in the __reflist table of firepit v2.0
         var_is_source = not is_reversed
 

From a282680022b49bb1acf413b1d9c3f0da0eca1f0a Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Tue, 19 Apr 2022 00:01:17 -0400
Subject: [PATCH 06/35] minor update: better var name

---
 src/kestrel/codegen/queries.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/kestrel/codegen/queries.py b/src/kestrel/codegen/queries.py
index 044cdc14..783713c3 100644
--- a/src/kestrel/codegen/queries.py
+++ b/src/kestrel/codegen/queries.py
@@ -145,17 +145,17 @@ def _generate_ref_query(input_var_name, input_type, var_attr, ret_type, ret_attr
     )
 
 
-def _generate_reflist_query(input_var_name, var_is_source, ref_name, entity_y):
-    var_ref_pos, y_ref_pos = (
+def _generate_reflist_query(input_var_name, var_is_source, ref_name, ret_type):
+    var_ref_pos, ret_ref_pos = (
         ("source_ref", "target_ref") if var_is_source else ("target_ref", "source_ref")
     )
     return Query(
         [
             Table(input_var_name),
             Join("__reflist", "id", "=", var_ref_pos),
-            Join(entity_y, y_ref_pos, "=", "id"),
+            Join(ret_type, ret_ref_pos, "=", "id"),
             Filter([Predicate("ref_name", "=", ref_name)]),
-            Projection([Column("*", entity_y)]),  # All columns from entity_y
+            Projection([Column("*", ret_type)]),  # All columns from ret_type
             Unique(),
         ]
     )

From 5dfcc8410e3420fd74cbca90af36c5b49576ed95 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Tue, 19 Apr 2022 07:51:03 -0400
Subject: [PATCH 07/35] Don't try to deref a path if there's already a column
 by that name

---
 src/kestrel/codegen/commands.py |  2 +-
 tests/test_session.py           | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/kestrel/codegen/commands.py b/src/kestrel/codegen/commands.py
index 5128181f..9153aaf3 100644
--- a/src/kestrel/codegen/commands.py
+++ b/src/kestrel/codegen/commands.py
@@ -616,7 +616,7 @@ def _set_projection(store, entity_table, query, paths):
     for path in paths:
         if path == "*":
             return
-        if "_ref" in path:  # This seems like a hack
+        if "_ref" in path and path not in cols:  # This seems like a hack
             joins, table, column = store.path_joins(entity_table, None, path)
             if table not in joined:
                 query.extend(joins)
diff --git a/tests/test_session.py b/tests/test_session.py
index eb754f36..2a34c9ea 100644
--- a/tests/test_session.py
+++ b/tests/test_session.py
@@ -299,3 +299,16 @@ def test_sha256_attr_name(cbcloud_powershell_bundle):
             df["binary_ref.hashes.'SHA-256'"][0]
             == "de96a6e69944335375dc1ac238336066889d9ffc7d73628ef4fe1b1b160ab32c"
         )
+
+
+def test_disp_after_group(fake_bundle_file):
+    with Session(debug_mode=True) as session:
+        session.execute(
+            f"""
+conns = get network-traffic from file://{fake_bundle_file}
+    where [network-traffic:dst_port < 10000]
+grouped = group conns by src_ref.value, dst_ref.value with count(src_ref.value) as count
+""")
+        out = session.execute("DISP grouped ATTR src_ref.value, dst_ref.value, count")
+        df = out[0].dataframe
+        assert list(df.columns) == ["src_ref.value", "dst_ref.value", "count"]

From a12a9d64b0af60f5e823c9687e8caa5e54b7566f Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Tue, 19 Apr 2022 16:38:15 -0400
Subject: [PATCH 08/35] Change FUNCNAME from a terminal to an inlined rule

---
 src/kestrel/syntax/kestrel.lark | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/kestrel/syntax/kestrel.lark b/src/kestrel/syntax/kestrel.lark
index 1c6fc68d..8a9071fd 100644
--- a/src/kestrel/syntax/kestrel.lark
+++ b/src/kestrel/syntax/kestrel.lark
@@ -107,7 +107,7 @@ path_list: STIXPATH ("," STIXPATH)* -> valuelist
 
 agg_list: agg ("," agg)*
 
-agg: FUNCNAME "(" STIXPATH ")" ("AS"i alias)?
+agg: funcname "(" STIXPATH ")" ("AS"i alias)?
 
 ?alias: CNAME
 
@@ -128,7 +128,7 @@ REVERSED: "by"i
 COMMENT: /#.*/
 URI: PATH
 
-FUNCNAME: (MIN|MAX|SUM|AVG|COUNT|NUNIQUE)
+?funcname: (MIN|MAX|SUM|AVG|COUNT|NUNIQUE)
 MIN: "min"i
 MAX: "max"i
 SUM: "sum"i

From 9ec8baa781cef158d3bfde6aa225b230f44d5c71 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Tue, 19 Apr 2022 17:15:22 -0400
Subject: [PATCH 09/35] fix two issues for complete FIND command 1. the
 terminal "by"i is now differentiated between FIND and SORT/GROUP 2. now
 variables such as `conns` are not in the completion of 'FIND process c'

---
 src/kestrel/session.py          | 20 ++++++++++----------
 src/kestrel/syntax/kestrel.lark |  9 +++++----
 tests/test_completion.py        |  3 ++-
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/kestrel/session.py b/src/kestrel/session.py
index 5a05962b..0886f4dc 100644
--- a/src/kestrel/session.py
+++ b/src/kestrel/session.py
@@ -370,6 +370,7 @@ def do_complete(self, code, cursor_pos):
         prefix = code[:cursor_pos]
         words = prefix.split(" ")
         last_word = words[-1]
+        last_char = prefix[-1]
         _logger.debug('code="%s" prefix="%s" last_word="%s"', code, prefix, last_word)
 
         if "START" in prefix or "STOP" in prefix:
@@ -439,17 +440,16 @@ def do_complete(self, code, cursor_pos):
                             tmp.extend(get_entity_types())
                         else:
                             tmp.extend(all_relations)
-                    elif token == "REVERSED":
+                    elif token == "BY":
                         tmp.append("BY")
-                        prev_word = words[-2] if len(words) >= 2 else ""
-                        _logger.debug("prev_word = %s", prev_word)
-                        if prev_word in all_relations:
-                            pass
-                        elif prev_word in varnames:
-                            pass
-                        elif last_word not in varnames:
-                            # Must be FIND and not GROUP
-                            tmp.extend(all_relations)
+                    elif token == "REVERSED":
+                        if last_char == " ":
+                            tmp.append("BY")
+                        else:
+                            # "procs = FIND process l" will expect ['REVERSED', 'VARIABLE']
+                            # override results from the case of VARIABLE
+                            tmp = all_relations
+                            break
                     elif token == "FUNCNAME":
                         tmp.extend(AGG_FUNCS)
                     elif token == "TRANSFORM":
diff --git a/src/kestrel/syntax/kestrel.lark b/src/kestrel/syntax/kestrel.lark
index 1c6fc68d..34b78008 100644
--- a/src/kestrel/syntax/kestrel.lark
+++ b/src/kestrel/syntax/kestrel.lark
@@ -44,11 +44,11 @@ find: "find"i ENTITY_TYPE RELATION (REVERSED)? VARIABLE (starttime endtime)?
 
 apply: "apply"i ANALYTICS "on"i variables ("with"i anaparams)?
 
-join: "join"i VARIABLE "," VARIABLE ("by"i STIXPATH "," STIXPATH)?
+join: "join"i VARIABLE "," VARIABLE (BY STIXPATH "," STIXPATH)?
 
-sort: "sort"i VARIABLE "by"i STIXPATH (ASC|DESC)?
+sort: "sort"i VARIABLE BY STIXPATH (ASC|DESC)?
 
-group: "group"i VARIABLE "by"i path_list ("with"i agg_list)?
+group: "group"i VARIABLE BY path_list ("with"i agg_list)?
 
 load: "load"i DUMPPATH ("as"i ENTITY_TYPE)?
 
@@ -63,7 +63,7 @@ expression: transform where_clause? attr_clause? sort_clause? limit_clause? offs
 
 where_clause: "where"i condition
 attr_clause: "attr"i STIXPATHS
-sort_clause: "sort"i "by"i STIXPATH (ASC|DESC)?
+sort_clause: "sort"i BY STIXPATH (ASC|DESC)?
 limit_clause: "limit"i INT
 offset_clause: "offset"i INT
 
@@ -125,6 +125,7 @@ DUMPPATH: PATH
 ASC: "asc"i
 DESC: "desc"i
 REVERSED: "by"i
+BY: "by"i
 COMMENT: /#.*/
 URI: PATH
 
diff --git a/tests/test_completion.py b/tests/test_completion.py
index 2d51ceb7..0d1268be 100644
--- a/tests/test_completion.py
+++ b/tests/test_completion.py
@@ -51,9 +51,10 @@ def a_session():
         #("procs = FIND process ", {"created", "loaded", "linked"}),
         ("procs = FIND process ", all_relations),
         ("procs = FIND process l", {"oaded", "inked"}),
-        ("procs = FIND process c", {"reated", "ontained", "onns"}),  # FIXME: shouldn't suggest var here
+        ("procs = FIND process c", {"reated", "ontained"}),
         ("procs = FIND process created ", {"conns", "_", "BY"}),
         ("procs = FIND process created BY ", {"conns", "_"}),
+        ("procs2 = SORT procs ", {"BY"}),
         ("grps = GR", {"OUP"}),
         ("grps = GROUP ", {"conns", "_"}),
         ("grps = GROUP conns ", {"BY"}),

From 07dc9ced79cce018f891d6beac606ea84e4b535a Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Tue, 19 Apr 2022 17:22:44 -0400
Subject: [PATCH 10/35] add logo to readthedocs

---
 docs/_static/css/logo.css  |  8 ++++++++
 docs/conf.py               |  9 +++++++++
 logo/logo_w_text_white.svg | 20 ++++++++++++++++++++
 logo/logo_white.svg        | 18 ++++++++++++++++++
 4 files changed, 55 insertions(+)
 create mode 100644 docs/_static/css/logo.css
 create mode 100644 logo/logo_w_text_white.svg
 create mode 100644 logo/logo_white.svg

diff --git a/docs/_static/css/logo.css b/docs/_static/css/logo.css
new file mode 100644
index 00000000..9db57eb2
--- /dev/null
+++ b/docs/_static/css/logo.css
@@ -0,0 +1,8 @@
+/*
+`width:auto` was rendering 0px wide for .svg files
+https://stackoverflow.com/questions/59215996/how-to-add-a-logo-to-my-readthedocs-logo-rendering-at-0px-wide
+*/
+.wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo {
+    width: 241px;
+    margin-top: 15px;
+}
diff --git a/docs/conf.py b/docs/conf.py
index 14ce02f1..dde78eb3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -35,3 +35,12 @@ def get_version():
 html_title = project
 html_theme = "sphinx_rtd_theme"
 highlight_language = "none"
+html_logo = "../logo/logo_w_text_white.svg"
+html_theme_options = {
+    'logo_only': True,
+    'display_version': False,
+}
+html_static_path = ['_static']
+html_css_files = [
+    'css/logo.css',
+]
diff --git a/logo/logo_w_text_white.svg b/logo/logo_w_text_white.svg
new file mode 100644
index 00000000..79894964
--- /dev/null
+++ b/logo/logo_w_text_white.svg
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg viewBox="0 0 460 160" xmlns="http://www.w3.org/2000/svg">
+  <style>
+    .top    {fill:#FFFFFF; transform: skewX(35deg)}
+    .bottom {fill:#FFFFFF; transform: skewX(-35deg)}
+    .name   {fill:#FFFFFF; font: 80px sans-serif}
+  </style>
+  <defs>
+    <rect class="top"    width="26" height="50" id="huntstepT" />
+    <rect class="bottom" width="26" height="50" id="huntstepB" />
+  </defs>
+  <!--
+  <rect x="0" y="0" width="460" height="160" fill="#eeeeee" />
+  -->
+  <use href="#huntstepT" x="15" y="30" />
+  <use href="#huntstepB" x="85" y="80" />
+  <use href="#huntstepT" x="85" y="30" />
+  <use href="#huntstepB" x="120" y="80" />
+  <text x="170" y="108" class="name">Kestrel</text>
+</svg>
diff --git a/logo/logo_white.svg b/logo/logo_white.svg
new file mode 100644
index 00000000..4c543f50
--- /dev/null
+++ b/logo/logo_white.svg
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg viewBox="0 0 160 160" xmlns="http://www.w3.org/2000/svg">
+  <style>
+    .top    {fill:#FFFFFF; transform: skewX(35deg)}
+    .bottom {fill:#FFFFFF; transform: skewX(-35deg)}
+  </style>
+  <defs>
+    <rect class="top"    width="26" height="50" id="huntstepT" />
+    <rect class="bottom" width="26" height="50" id="huntstepB" />
+  </defs>
+  <!--
+  <rect x="0" y="0" width="160" height="160" fill="#eeeeee" />
+  -->
+  <use href="#huntstepT" x="15" y="30" />
+  <use href="#huntstepB" x="85" y="80" />
+  <use href="#huntstepT" x="85" y="30" />
+  <use href="#huntstepB" x="120" y="80" />
+</svg>

From 9650425e33b2badea9cfbad9b79b8b4fa370ebb6 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Tue, 19 Apr 2022 17:43:49 -0400
Subject: [PATCH 11/35] hotfix: broken index in parser after using BY

---
 src/kestrel/syntax/parser.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py
index f776fdac..aca44896 100644
--- a/src/kestrel/syntax/parser.py
+++ b/src/kestrel/syntax/parser.py
@@ -119,21 +119,21 @@ def join(self, args):
                 "command": "join",
                 "input": _first(args),
                 "input_2": _second(args),
-                "path": _third(args),
-                "path_2": _fourth(args),
+                "path": _fourth(args),
+                "path_2": _fifth(args),
             }
         else:
             return {"command": "join", "input": _first(args), "input_2": _second(args)}
 
     def group(self, args):
         # args[1] was already transformed by path_list/valuelist
-        cols = _normalize_paths(args[1])
+        cols = _normalize_paths(args[2])
         result = {
             "command": "group",
             "paths": cols,
             "input": _extract_var(args, self.default_variable),
         }
-        aggregations = args[2] if len(args) > 2 else None
+        aggregations = args[3] if len(args) > 3 else None
         if aggregations:
             result["aggregations"] = aggregations
         return result
@@ -306,6 +306,8 @@ def _third(args):
 def _fourth(args):
     return args[3].value
 
+def _fifth(args):
+    return args[3].value
 
 def _last(args):
     return args[-1].value

From 0005abb639d727b3cb73912b61ceaffef259c086 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Tue, 19 Apr 2022 17:52:12 -0400
Subject: [PATCH 12/35] style check

---
 src/kestrel/syntax/parser.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py
index aca44896..85b569d4 100644
--- a/src/kestrel/syntax/parser.py
+++ b/src/kestrel/syntax/parser.py
@@ -306,9 +306,11 @@ def _third(args):
 def _fourth(args):
     return args[3].value
 
+
 def _fifth(args):
     return args[3].value
 
+
 def _last(args):
     return args[-1].value
 

From cfcae3bb9baa96957c44def9955da025065bc7a3 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Wed, 20 Apr 2022 07:27:43 -0400
Subject: [PATCH 13/35] Make auto-completion case-insensitive

---
 src/kestrel/session.py   | 7 +++++++
 tests/test_completion.py | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/kestrel/session.py b/src/kestrel/session.py
index 5a05962b..fe64c3aa 100644
--- a/src/kestrel/session.py
+++ b/src/kestrel/session.py
@@ -66,6 +66,7 @@
 from kestrel.syntax.parser import parse
 from kestrel.syntax.utils import (
     get_entity_types,
+    get_keywords,
     all_relations,
     LITERALS,
     AGG_FUNCS,
@@ -415,8 +416,11 @@ def do_complete(self, code, cursor_pos):
             except KestrelSyntaxError as e:
                 _logger.debug("exception: %s", e)
                 varnames = self.get_variable_names()
+                keywords = set(get_keywords())
+                _logger.debug("keywords: %s", keywords)
                 tmp = []
                 for token in e.expected:
+                    _logger.debug("token: %s", token)
                     if token == "VARIABLE":
                         tmp.extend(varnames)
                     elif token == "DATASRC":
@@ -460,6 +464,9 @@ def do_complete(self, code, cursor_pos):
                         continue
                     elif token == "EQUAL":
                         tmp.append("=")
+                    elif token in keywords and last_word.islower():
+                        # keywords has both upper and lower case
+                        tmp.append(token.lower())
                     else:
                         tmp.append(token)
                 allnames = sorted(tmp)
diff --git a/tests/test_completion.py b/tests/test_completion.py
index 2d51ceb7..14d75d11 100644
--- a/tests/test_completion.py
+++ b/tests/test_completion.py
@@ -58,6 +58,8 @@ def a_session():
         ("grps = GROUP ", {"conns", "_"}),
         ("grps = GROUP conns ", {"BY"}),
         ("grps = GROUP conns by ", []),  # TODO: we don't suggest attrs yet
+        ("urls = g", ["et", "roup"]),
+        ("urls = ge", ["t"]),
         ("urls = get ", KNOWN_ETYPES),
         ("urls = get url ", ["FROM", "WHERE"]),
         ("urls = get url from ", ["_", "conns", "file://", "http://", "https://", "stixshifter://"]),

From 5b306094126cb56245b8f5c9981f84f32bc45cfa Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 11:57:49 -0400
Subject: [PATCH 14/35] add testing coverage report in GitHub Action

---
 .github/workflows/unit-testing.yml | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index 5dcd27b7..ad453df0 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -29,12 +29,18 @@ jobs:
         uses: actions/setup-python@v3
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install Kestrel package
+      - name: Install Kestrel
         run: |
           python -m pip install --upgrade pip
           python -m pip install --upgrade setuptools
-          python -m pip install pytest
           python -m pip install .
-          python -m pip install stix-shifter-modules-stix_bundle
-      - name: Unit testing
-        run: pytest -vv
+      - name: Unit testing with coverage report
+        run: |
+          python -m pip install pytest
+          python -m pip install pytest-cov
+          python -m pytest -vv --cov=./ --cov-report=xml
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v2
+        with:
+          fail_ci_if_error: true
+          verbose: true

From b8950c056625b4607d59fe6afc50dc970adf63c6 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 14:29:17 -0400
Subject: [PATCH 15/35] update codecov action version

---
 .github/workflows/unit-testing.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index ad453df0..2c72e887 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install pytest-cov
           python -m pytest -vv --cov=./ --cov-report=xml
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v2
+        uses: codecov/codecov-action@v3
         with:
           fail_ci_if_error: true
           verbose: true

From 62387f290ce660ca0651aca3700d35e4820d9201 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Thu, 21 Apr 2022 15:59:35 -0400
Subject: [PATCH 16/35] Fix JOIN; add unit test w/CSV data

---
 src/kestrel/syntax/parser.py  |  4 ++--
 tests/test_command_join.py    | 21 +++++++++++++++++++++
 tests/test_input_data_ips.csv |  3 +++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_command_join.py
 create mode 100644 tests/test_input_data_ips.csv

diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py
index 85b569d4..ed426437 100644
--- a/src/kestrel/syntax/parser.py
+++ b/src/kestrel/syntax/parser.py
@@ -114,7 +114,7 @@ def find(self, args):
         return packet
 
     def join(self, args):
-        if len(args) == 4:
+        if len(args) == 5:
             return {
                 "command": "join",
                 "input": _first(args),
@@ -308,7 +308,7 @@ def _fourth(args):
 
 
 def _fifth(args):
-    return args[3].value
+    return args[4].value
 
 
 def _last(args):
diff --git a/tests/test_command_join.py b/tests/test_command_join.py
new file mode 100644
index 00000000..be10d624
--- /dev/null
+++ b/tests/test_command_join.py
@@ -0,0 +1,21 @@
+import os
+
+from kestrel.session import Session
+
+
+def test_join_csv_data():
+    data_file_path = os.path.join(
+        os.path.dirname(__file__), "test_input_data_ips.csv"
+    )
+    with Session() as s:
+        s.execute(f"assets = LOAD {data_file_path} AS ipv4-addr")
+        s.execute("""
+ips = NEW [{"type": "ipv4-addr", "value": "192.168.1.2"},
+           {"type": "ipv4-addr", "value": "192.168.1.3"}]
+""")
+        s.execute("risk_ips = JOIN ips, assets by value, value")
+        v = s.get_variable("risk_ips")
+        assert len(v) == 1
+        assert v[0]["type"] == "ipv4-addr"
+        assert v[0]["value"] == "192.168.1.2"
+        assert v[0]["risk"] == 2
diff --git a/tests/test_input_data_ips.csv b/tests/test_input_data_ips.csv
new file mode 100644
index 00000000..cf9974f4
--- /dev/null
+++ b/tests/test_input_data_ips.csv
@@ -0,0 +1,3 @@
+"value","risk"
+"192.168.1.1",1
+"192.168.1.2",2
\ No newline at end of file

From 17ad885fc854c1de1c5dd9498f3262a2a1c17e2f Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 16:08:07 -0400
Subject: [PATCH 17/35] use tmp dir for testing SAVE command

---
 tests/test_command_save.py | 32 +++++++++++++++++++-------------
 tests/test_session.py      |  3 ++-
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/tests/test_command_save.py b/tests/test_command_save.py
index 3fbb0648..69b7923a 100644
--- a/tests/test_command_save.py
+++ b/tests/test_command_save.py
@@ -21,11 +21,13 @@ def test_save_parquet_gz(tmp_path):
     data_file_path = os.path.join(
         os.path.dirname(__file__), "test_input_data_procs.parquet.gz"
     )
+    stmt_save = f"newvar = LOAD {data_file_path} SAVE newvar TO {save_path}"
+    stmt_load = f"newload = LOAD {save_path}"
+
     with Session() as s:
-        stmt_save = f"newvar = LOAD {data_file_path} SAVE newvar TO {save_path}"
         s.execute(stmt_save)
-        assert save_path.exists()
-        stmt_load = f"newload = LOAD {save_path}"
+    assert save_path.exists()
+
     with Session() as s:
         s.execute(stmt_load)
         v = s.get_variable("newload")
@@ -34,21 +36,25 @@ def test_save_parquet_gz(tmp_path):
         assert v[0]["name"] == "reg.exe"
 
 
-def test_save_network_traffic_v4(fake_bundle_file):
+def test_save_network_traffic_v4(tmp_path, fake_bundle_file):
+    save_path = tmp_path / "conns.csv"
     with Session(debug_mode=True) as session:
         session.execute(
-            f"""conns = get network-traffic
-            from file://{fake_bundle_file}
-            where [network-traffic:dst_port > 0]""",
+            f"""conns = GET network-traffic
+                        FROM file://{fake_bundle_file}
+                        WHERE [network-traffic:dst_port > 0]""",
         )
-        session.execute("save conns to conns.csv")
+        session.execute(f"SAVE conns TO {save_path}")
+    assert save_path.exists()
 
 
-def test_save_network_traffic_v4_v6(proc_bundle_file):
+def test_save_network_traffic_v4_v6(tmp_path, proc_bundle_file):
+    save_path = tmp_path / "conns.csv"
     with Session(debug_mode=True) as session:
         session.execute(
-            f"""conns = get network-traffic
-            from file://{proc_bundle_file}
-            where [network-traffic:dst_port > 0]""",
+            f"""conns = GET network-traffic
+                        FROM file://{proc_bundle_file}
+                        WHERE [network-traffic:dst_port > 0]""",
         )
-        session.execute("save conns to conns.csv")
+        session.execute(f"SAVE conns TO {save_path}")
+    assert save_path.exists()
diff --git a/tests/test_session.py b/tests/test_session.py
index 2a34c9ea..c8fdfedd 100644
--- a/tests/test_session.py
+++ b/tests/test_session.py
@@ -308,7 +308,8 @@ def test_disp_after_group(fake_bundle_file):
 conns = get network-traffic from file://{fake_bundle_file}
     where [network-traffic:dst_port < 10000]
 grouped = group conns by src_ref.value, dst_ref.value with count(src_ref.value) as count
-""")
+"""
+        )
         out = session.execute("DISP grouped ATTR src_ref.value, dst_ref.value, count")
         df = out[0].dataframe
         assert list(df.columns) == ["src_ref.value", "dst_ref.value", "count"]

From 5a655c9c04fd52ec208ec9b700c4aab993b846f7 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Thu, 21 Apr 2022 16:43:35 -0400
Subject: [PATCH 18/35] Fix for auto-dereferencing with mixed IPv4/IPv6

---
 src/kestrel/codegen/commands.py | 44 +++++++++++++++------------------
 src/kestrel/syntax/parser.py    | 14 +++++------
 tests/test_command_disp.py      | 28 +++++++++++++++++++++
 3 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/src/kestrel/codegen/commands.py b/src/kestrel/codegen/commands.py
index 3a46df12..acb0a25d 100644
--- a/src/kestrel/codegen/commands.py
+++ b/src/kestrel/codegen/commands.py
@@ -23,7 +23,8 @@
 import itertools
 from collections import OrderedDict
 
-from firepit.query import Column, Limit, Offset, Order, Projection, Query
+from firepit.deref import auto_deref
+from firepit.query import Limit, Offset, Order, Projection, Query
 from firepit.stix20 import summarize_pattern
 
 from kestrel.utils import remove_empty_dicts, dedup_ordered_dicts
@@ -613,7 +614,7 @@ def _filter_prefetched_process(
     id_pattern = build_pattern_from_ids(return_type, entity_ids)
     if id_pattern:
         session.store.extract(prefetch_filtered_var_name, return_type, None, id_pattern)
-        _logger.debug(f"filter successful.")
+        _logger.debug("filter successful.")
         return prefetch_filtered_var_name
     else:
         _logger.info("no prefetched process found after filtering.")
@@ -621,35 +622,30 @@ def _filter_prefetched_process(
 
 
 def _set_projection(store, entity_table, query, paths):
-    proj = []
-    cols = store.columns(entity_table)
-    joined = set()
-    for path in paths:
-        if path == "*":
-            return
-        if "_ref" in path and path not in cols:  # This seems like a hack
-            joins, table, column = store.path_joins(entity_table, None, path)
-            if table not in joined:
-                query.extend(joins)
-                joined.add(table)
-            proj.append(Column(column, table, path))
-        elif path in cols:
-            # Prevent any ambiguity
-            proj.append(Column(path, entity_table))
-        else:
-            # Not sure where it came from
-            proj.append(path)
-    query.append(Projection(proj))
+    joins, proj = auto_deref(store, entity_table, paths=paths)
+    query.joins.extend(joins)
+    if query.proj:
+        # Need to merge projections?  More-specific overrides less-specific ("*")
+        new_cols = []
+        for p in query.proj.cols:
+            if not (hasattr(p, "table") and p.table == entity_table and p.name == "*"):
+                new_cols.append(p)
+        for p in proj.cols:
+            if not (hasattr(p, "table") and p.table == entity_table and p.name == "*"):
+                new_cols.append(p)
+        query.proj = Projection(new_cols)
+    else:
+        query.proj = proj
 
 
 def _build_query(store, entity_table, qry, stmt):
     where = stmt.get("where")
     if where:
+        where.set_table(entity_table)
         qry.append(where)
     attrs = stmt.get("attrs", "*")
-    if attrs != "*":
-        cols = attrs.split(",")
-        _set_projection(store, entity_table, qry, cols)
+    cols = attrs.split(",")
+    _set_projection(store, entity_table, qry, cols)
     sort_by = stmt.get("path")
     if sort_by:
         direction = "ASC" if stmt["ascending"] else "DESC"
diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py
index ed426437..046b24cc 100644
--- a/src/kestrel/syntax/parser.py
+++ b/src/kestrel/syntax/parser.py
@@ -2,7 +2,7 @@
 from pkgutil import get_data
 
 from firepit.query import Filter, Predicate
-from lark import Lark, Transformer, Tree
+from lark import Lark, Token, Transformer, Tree
 
 
 def parse(stmts, default_variable="_", default_sort_order="desc"):
@@ -253,19 +253,19 @@ def agg(self, args):
         return {"func": func, "attr": args[1].value, "alias": alias}
 
     def disj(self, args):
-        lhs = str(args[0])
-        rhs = str(args[2])
+        lhs = str(args[0]) if isinstance(args, Token) else args[0]
+        rhs = str(args[1]) if isinstance(args, Token) else args[1]
         return Predicate(lhs, "OR", rhs)
 
     def conj(self, args):
-        lhs = str(args[0])
-        rhs = str(args[2])
+        lhs = str(args[0]) if isinstance(args, Token) else args[0]
+        rhs = str(args[1]) if isinstance(args, Token) else args[1]
         return Predicate(lhs, "AND", rhs)
 
     def comp(self, args):
-        lhs = str(args[0])
+        lhs = str(args[0]) if isinstance(args, Token) else args[0]
         op = str(args[1])
-        rhs = str(args[2])
+        rhs = str(args[2]) if isinstance(args, Token) else args[2]
         return Predicate(lhs, op, rhs)
 
     def null_comp(self, args):
diff --git a/tests/test_command_disp.py b/tests/test_command_disp.py
index 69d6f307..b881c217 100644
--- a/tests/test_command_disp.py
+++ b/tests/test_command_disp.py
@@ -1,9 +1,17 @@
+import os
+import pandas as pd
 import pytest
 
 from kestrel.exceptions import VariableNotExist
 from kestrel.session import Session
 
 
+@pytest.fixture
+def proc_bundle_file():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    return os.path.join(cwd, "doctored-1k.json")
+
+
 def test_disp():
     with Session() as s:
         stmt = """
@@ -50,3 +58,23 @@ def test_disp_grouped_conns():
         out = s.execute("DISP grpvar")
         data = out[0].to_dict()["data"]
         assert len(data) == 2
+
+
+def test_disp_mixed_v4_v6(proc_bundle_file):
+    with Session() as s:
+        stmt = f"""
+conns = GET network-traffic
+        FROM file://{proc_bundle_file}
+        WHERE [network-traffic:dst_port > 0]
+"""
+        s.execute(stmt)
+
+        out = s.execute("DISP conns ATTR src_ref.value, src_port")
+        data = out[0].to_dict()["data"]
+        df = pd.DataFrame.from_records(data)
+        assert df.columns.tolist() == ["src_ref.value", "src_port"]
+
+        out = s.execute("DISP TIMESTAMPED(conns) ATTR src_ref.value, src_port")
+        data = out[0].to_dict()["data"]
+        df = pd.DataFrame.from_records(data)
+        assert df.columns.tolist() == ["first_observed", "src_ref.value", "src_port"]

From 67dfac1ba6cb6f9ad764b7ac4f4f052ee4a204d5 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Thu, 21 Apr 2022 16:53:33 -0400
Subject: [PATCH 19/35] Add some unit tests I forget to include

---
 tests/test_command_assign.py | 57 +++++++++++++++++++++
 tests/test_expressions.py    | 96 ++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 tests/test_command_assign.py
 create mode 100644 tests/test_expressions.py

diff --git a/tests/test_command_assign.py b/tests/test_command_assign.py
new file mode 100644
index 00000000..db966ea0
--- /dev/null
+++ b/tests/test_command_assign.py
@@ -0,0 +1,57 @@
+import os
+import pytest
+
+from kestrel.session import Session
+
+
+NEW_PROCS = """
+p = NEW [
+          {"type": "process", "name": "cmd.exe", "command_line": "cmd -c dir"},
+          {"type": "process", "name": "explorer.exe", "pid": "99"}
+        ]
+"""
+
+@pytest.fixture
+def proc_bundle_file():
+    cwd = os.path.dirname(os.path.abspath(__file__))
+    return os.path.join(cwd, "doctored-1k.json")
+
+
+@pytest.mark.parametrize(
+    "stmt, expected",
+    [
+        ("x = p", 2),
+        ("x = p WHERE pid = 99", 1),
+        ("x = p WHERE command_line IS NULL", 1),
+        ("x = p WHERE command_line IS NOT NULL", 1),
+        ("x = p WHERE command_line LIKE '%cmd%'", 1),
+    ],
+)
+def test_assign_after_new(stmt, expected):
+    with Session() as s:
+        s.execute(NEW_PROCS)
+        s.execute(stmt)
+        x = s.get_variable("x")
+        assert len(x) == expected, f"ASSIGN error: f{stmt}"
+
+
+@pytest.mark.parametrize(
+    "stmt, expected",
+    [
+        ("x = p", 2000),
+        ("x = p WHERE pid = 1380", 106 * 2),  #FIXME: doubled due to prefetch
+        ("x = p WHERE command_line IS NULL", 948 * 2),
+        ("x = p WHERE command_line IS NOT NULL", 104),
+        ("x = p WHERE command_line LIKE '%/node%'", 1 * 2),
+        ("x = p WHERE pid = 5960 OR name = 'taskeng.exe'", 4),
+        ("x = p WHERE (pid = 5960 OR name = 'taskeng.exe') AND command_line IS NULL", 0),
+    ],
+)
+def test_assign_after_get(proc_bundle_file, stmt, expected):
+    with Session() as s:
+        s.execute(("p = GET process"
+                   f" FROM file://{proc_bundle_file}"
+                   "  WHERE [process:pid > 0]"))
+        s.execute(stmt)
+        x = s.get_variable("x")
+        assert len(x) == expected, f"ASSIGN error: {stmt}"
diff --git a/tests/test_expressions.py b/tests/test_expressions.py
new file mode 100644
index 00000000..3330f005
--- /dev/null
+++ b/tests/test_expressions.py
@@ -0,0 +1,96 @@
+import json
+
+import pytest
+
+from kestrel.session import Session
+
+
+NEW_PROCS = """
+procs = NEW [ {"type": "process", "name": "cmd.exe", "pid": 123, "x_foo": "bar"}
+            , {"type": "process", "name": "explorer.exe", "pid": 99}]
+"""
+
+
+@pytest.mark.parametrize(
+    "attrs, unexpected", [
+        ("pid", {"name"}),
+        ("name", {"pid"}),
+        ("pid,name", set()),
+    ]
+)
+def test_expr_attr(attrs, unexpected):
+    with Session() as s:
+        s.execute(NEW_PROCS)
+        out = s.execute(f"DISP procs ATTR {attrs}")
+        data = out[0].to_dict()["data"]
+        print(json.dumps(data, indent=4))
+        actual = set(data[0].keys())
+        expected = set(attrs.split(","))
+        assert expected == actual
+        assert len(unexpected & actual) == 0
+
+
+@pytest.mark.parametrize(
+    "prop, direction, expected", [
+        ("pid", "asc", [99, 123]),
+        ("pid", "desc", [123, 99]),
+        ("name", "asc", ["cmd.exe", "explorer.exe"]),
+        ("name", "desc", ["explorer.exe", "cmd.exe"]),
+    ]
+)
+def test_expr_sort(prop, direction, expected):
+    with Session() as s:
+        s.execute(NEW_PROCS)
+        out = s.execute(f"DISP procs sort by {prop} {direction}")
+        data = out[0].to_dict()["data"]
+        print(json.dumps(data, indent=4))
+        actual = [p[prop] for p in data]
+        assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "limit, offset, expected", [
+        (5, 0, [99, 123]),
+        (1, 0, [99]),
+        (2, 1, [123]),
+        (1, 1, [123]),
+    ]
+)
+def test_expr_limit_offset(limit, offset, expected):
+    with Session() as s:
+        s.execute(NEW_PROCS)
+        out = s.execute(f"DISP procs SORT BY pid ASC LIMIT {limit} OFFSET {offset}")
+        data = out[0].to_dict()["data"]
+        print(json.dumps(data, indent=4))
+        actual = [p["pid"] for p in data]
+        assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "col, op, val, expected", [
+        ("pid", "=", 99, [99]),
+        ("pid", "<", 100, [99]),
+        ("pid", ">=", 100, [123]),
+        ("x_foo", "IS NULL", "", [99]),
+        ("x_foo", "IS NOT NULL", "", [123]),
+        ("x_foo", "=", "'bar'", [123]),
+    ]
+)
+def test_expr_where(col, op, val, expected):
+    with Session() as s:
+        s.execute(NEW_PROCS)
+        out = s.execute(f"DISP procs WHERE {col} {op} {val}")
+        data = out[0].to_dict()["data"]
+        print(json.dumps(data, indent=4))
+        actual = [p["pid"] for p in data]
+        assert actual == expected
+
+
+def test_expr_assign_where():
+    with Session() as s:
+        s.execute(NEW_PROCS)
+        out = s.execute("x = procs WHERE pid > 100")
+        data = out[0].to_dict()["data"]
+        print(json.dumps(data, indent=4))
+        vars_updated = data["variables updated"]
+        assert vars_updated[0]["#(ENTITIES)"] == 1

From e1bdf7809b9e2fa9fb98ae1993fb68a59ee33c55 Mon Sep 17 00:00:00 2001
From: Paul Coccoli <pcoccoli@us.ibm.com>
Date: Thu, 21 Apr 2022 17:08:19 -0400
Subject: [PATCH 20/35] Require firepit>=2.0.1

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index ec960fed..a9d50c78 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -37,7 +37,7 @@ install_requires =
     docker>=5.0.0
     stix-shifter>=3.6.0
     stix-shifter-utils>=3.6.0
-    firepit>=2.0.0
+    firepit>=2.0.1
 tests_require =
     pytest
 

From 2b6dc569ace237a12fceb64bf29c6655880c2392 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 17:45:04 -0400
Subject: [PATCH 21/35] hotfix on undefined-variables

---
 src/kestrel/absinterface/manager.py       | 4 ++--
 src/kestrel/exceptions.py                 | 2 +-
 src/kestrel/semantics.py                  | 7 ++++++-
 src/kestrel/syntax/parser.py              | 3 ---
 src/kestrel_analytics_python/interface.py | 2 +-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/kestrel/absinterface/manager.py b/src/kestrel/absinterface/manager.py
index f07c979a..6dacd5f4 100644
--- a/src/kestrel/absinterface/manager.py
+++ b/src/kestrel/absinterface/manager.py
@@ -54,9 +54,9 @@ def _parse_and_complete_uri(self, uri):
     def _get_interface_with_config(self, scheme):
         scheme = scheme.lower()
         if scheme not in self.scheme_to_interface:
-            raise nonexist_interface_exception(scheme)
+            raise self.nonexist_interface_exception(scheme)
         if scheme not in self.scheme_to_interface_name:
-            raise nonexist_interface_exception(scheme)
+            raise self.nonexist_interface_exception(scheme)
         interface_name = self.scheme_to_interface_name[scheme]
         interface_config = self.config[self.config_root_key][interface_name]
         interface = self.scheme_to_interface[scheme]
diff --git a/src/kestrel/exceptions.py b/src/kestrel/exceptions.py
index d0924082..1d1d2ec4 100644
--- a/src/kestrel/exceptions.py
+++ b/src/kestrel/exceptions.py
@@ -258,7 +258,7 @@ def __init__(self, type_received, types_expected):
 
 
 class InvalidAnalyticsOutput(KestrelException):
-    def __init__(self, analytcs_name, return_type):
+    def __init__(self, analytics_name, return_type):
         super().__init__(
             f"unsupported return type {return_type} from analytics: {analytics_name}"
         )
diff --git a/src/kestrel/semantics.py b/src/kestrel/semantics.py
index e456640e..4af69c3e 100644
--- a/src/kestrel/semantics.py
+++ b/src/kestrel/semantics.py
@@ -1,7 +1,12 @@
 import logging
 import re
 
-from kestrel.exceptions import InvalidAttribute, VariableNotExist, UnsupportedRelation
+from kestrel.exceptions import (
+    InvalidAttribute,
+    VariableNotExist,
+    UnsupportedRelation,
+    KestrelInternalError,
+)
 from kestrel.codegen.relations import stix_2_0_ref_mapping, generic_relations
 
 _logger = logging.getLogger(__name__)
diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py
index ed426437..c51bcfa1 100644
--- a/src/kestrel/syntax/parser.py
+++ b/src/kestrel/syntax/parser.py
@@ -228,9 +228,6 @@ def endtime(self, args):
     def variables(self, args):
         return {"variables": _extract_vars(args, self.default_variable)}
 
-    def variables(self, args):
-        return {"variables": _extract_vars(args, self.default_variable)}
-
     def localargs(self, args):
         return {args[0].value: args[1]}
 
diff --git a/src/kestrel_analytics_python/interface.py b/src/kestrel_analytics_python/interface.py
index fd15c78f..c866b9fc 100644
--- a/src/kestrel_analytics_python/interface.py
+++ b/src/kestrel_analytics_python/interface.py
@@ -230,7 +230,7 @@ def _execute(self, arg_variables):
         input_dataframes = [DataFrame(v.get_entities()) for v in arg_variables]
         if len(input_dataframes) != self._get_var_count():
             raise InvalidAnalyticsArgumentCount(
-                profile, len(input_dataframes), self._get_var_count()
+                self.name, len(input_dataframes), self._get_var_count()
             )
         else:
             try:

From 193e4eeb1a9d90515a3fd9941cee3d051c515127 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:21:09 -0400
Subject: [PATCH 22/35] focus code-cov on real source code

---
 .github/workflows/unit-testing.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index 2c72e887..cfb6a5e6 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -38,7 +38,9 @@ jobs:
         run: |
           python -m pip install pytest
           python -m pip install pytest-cov
-          python -m pytest -vv --cov=./ --cov-report=xml
+          # pytest-cov does not support automatic sub-package recognition in src/
+          # pass the sub-package names in through the --cov argument
+          python -m pytest -vv --cov-report=xml $(ls --ignore='*.egg-info' src | xargs | sed 's/^\| / --cov=/g')
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From e9d36b9456cbbdc650bb29c0dbeb95b8fa78fcbe Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:30:48 -0400
Subject: [PATCH 23/35] fix github workflow for macOS

---
 .github/workflows/unit-testing.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index cfb6a5e6..0944a6da 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
           # pass the sub-package names in through the --cov argument
-          python -m pytest -vv --cov-report=xml $(ls --ignore='*.egg-info' src | xargs | sed 's/^\| / --cov=/g')
+          python -m pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g')
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From 94f2b84dcb51726e47a9407a0e2d8ebb2a5700e0 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:36:51 -0400
Subject: [PATCH 24/35] avoid pytest-cov misfire for testing code

---
 .../{kestrel_python_analytics.py => python_analytics_mockup.py} | 0
 tests/test_python_analytics.py                                  | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tests/{kestrel_python_analytics.py => python_analytics_mockup.py} (100%)

diff --git a/tests/kestrel_python_analytics.py b/tests/python_analytics_mockup.py
similarity index 100%
rename from tests/kestrel_python_analytics.py
rename to tests/python_analytics_mockup.py
diff --git a/tests/test_python_analytics.py b/tests/test_python_analytics.py
index df8dcda5..df9caf14 100644
--- a/tests/test_python_analytics.py
+++ b/tests/test_python_analytics.py
@@ -22,7 +22,7 @@ def fake_bundle_4():
 def env_setup(tmp_path):
 
     analytics_module_path = str(
-        pathlib.Path(__file__).resolve().parent / "kestrel_python_analytics.py"
+        pathlib.Path(__file__).resolve().parent / "python_analytics_mockup.py"
     )
 
     profiles = f"""profiles:

From 571e98f8ca9f493b8c2857d1bf15b715b2063cf4 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:43:47 -0400
Subject: [PATCH 25/35] try to fix pytest-cov on macOS

---
 .github/workflows/unit-testing.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index 0944a6da..2f7b222e 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
           # pass the sub-package names in through the --cov argument
-          python -m pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g')
+          python -m pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') tests/
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From 045215b5e4b7fba951fc1952b188d51a9e1cf4b1 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:51:53 -0400
Subject: [PATCH 26/35] try to fix pytest-cov on mac OS

---
 .github/workflows/unit-testing.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index 2f7b222e..b249fd1b 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
           # pass the sub-package names in through the --cov argument
-          python -m pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') tests/
+          pytest -vv $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') --cov-report=xml
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From 763aab2cbea4e27a914b248138991d4a47ef543c Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:54:12 -0400
Subject: [PATCH 27/35] try fix pytest-cov on macOS

---
 .github/workflows/unit-testing.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index b249fd1b..e1b8b0c9 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
           # pass the sub-package names in through the --cov argument
-          pytest -vv $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') --cov-report=xml
+          python -m pytest -vv $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') --cov-report=xml tests/test_*.py
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From 12383b9b44cea62639385dd46b1009b7971fc8ce Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 22:57:39 -0400
Subject: [PATCH 28/35] debug pytest-cov on macOS

---
 .github/workflows/unit-testing.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index e1b8b0c9..cd611659 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,7 +40,9 @@ jobs:
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
           # pass the sub-package names in through the --cov argument
-          python -m pytest -vv $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') --cov-report=xml tests/test_*.py
+          echo $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g')
+          echo $(ls tests)
+          python -m pytest -vv $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') --cov-report=xml
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From eb85671bdc86493970b5b9191a5c745763d7173f Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 23:09:47 -0400
Subject: [PATCH 29/35] fix sed bug on macOS in pytest

---
 .github/workflows/unit-testing.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index cd611659..b117b9f8 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -40,9 +40,8 @@ jobs:
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
           # pass the sub-package names in through the --cov argument
-          echo $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g')
-          echo $(ls tests)
-          python -m pytest -vv $(ls src | grep -v '.egg-info' | xargs | sed 's/^\| / --cov=/g') --cov-report=xml
+          echo $(ls src | grep -v '.egg-info' | xargs | sed -r 's/^| / --cov=/g')
+          python -m pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed -r 's/^| / --cov=/g')
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
         with:

From 2590774ba11a3d12b34ef536ee75c65841715ba8 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 23:19:37 -0400
Subject: [PATCH 30/35] finally fix pytest-cov on macOS

---
 .github/workflows/code-style.yml                      | 2 +-
 .github/workflows/stixshifter-module-verification.yml | 2 +-
 .github/workflows/unit-testing.yml                    | 4 ++--
 .github/workflows/unused-import.yml                   | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/code-style.yml b/.github/workflows/code-style.yml
index 31886830..16e14a3f 100644
--- a/.github/workflows/code-style.yml
+++ b/.github/workflows/code-style.yml
@@ -32,4 +32,4 @@ jobs:
           python -m pip install black
           python -m pip install .
       - name: Code style check (please black your code)
-        run: black --check src/
+        run: python -m black --check src/
diff --git a/.github/workflows/stixshifter-module-verification.yml b/.github/workflows/stixshifter-module-verification.yml
index 39b68c44..df40f291 100644
--- a/.github/workflows/stixshifter-module-verification.yml
+++ b/.github/workflows/stixshifter-module-verification.yml
@@ -23,4 +23,4 @@ jobs:
           python -m pip install pytest
           python -m pip install .
       - name: Sample STIX-shifter Connector Package Verification on PyPI
-        run: pytest -vv tests/test_stixshifter.py -k test_verify_package_origin
+        run: python -m pytest -vv tests/test_stixshifter.py -k test_verify_package_origin
diff --git a/.github/workflows/unit-testing.yml b/.github/workflows/unit-testing.yml
index b117b9f8..0526983d 100644
--- a/.github/workflows/unit-testing.yml
+++ b/.github/workflows/unit-testing.yml
@@ -39,8 +39,8 @@ jobs:
           python -m pip install pytest
           python -m pip install pytest-cov
           # pytest-cov does not support automatic sub-package recognition in src/
-          # pass the sub-package names in through the --cov argument
-          echo $(ls src | grep -v '.egg-info' | xargs | sed -r 's/^| / --cov=/g')
+          # pass in all sub-package names through multiple --cov arguments
+          # use ls, xargs, and sed carefully regarding diff between mac/BSD and linux
           python -m pytest -vv --cov-report=xml $(ls src | grep -v '.egg-info' | xargs | sed -r 's/^| / --cov=/g')
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/unused-import.yml b/.github/workflows/unused-import.yml
index 0e10bff6..dd723fe5 100644
--- a/.github/workflows/unused-import.yml
+++ b/.github/workflows/unused-import.yml
@@ -32,4 +32,4 @@ jobs:
           python -m pip install unimport
           python -m pip install .
       - name: Check
-        run: unimport --check --exclude __init__.py src/
+        run: python -m unimport --check --exclude __init__.py src/

From 6d5ff7ff299e53054c6c0b032d430861b76308f1 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 23:27:35 -0400
Subject: [PATCH 31/35] ignore code in tests/ for codecov

---
 codecov.yml | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 codecov.yml

diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 00000000..e6b99e6b
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,2 @@
+ignore:
+  - "tests"

From b0b4746af6bde3c90ca44954bfb9ccd89925ed23 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 23:33:43 -0400
Subject: [PATCH 32/35] add codecov badge to README

---
 README.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.rst b/README.rst
index 1cf01f24..bc57af30 100644
--- a/README.rst
+++ b/README.rst
@@ -10,6 +10,10 @@
         :target: https://github.com/psf/black
         :alt: Code Style: Black
 
+.. image:: https://codecov.io/gh/opencybersecurityalliance/kestrel-lang/branch/develop/graph/badge.svg?token=HM4ax10IW3
+        :target: https://codecov.io/gh/opencybersecurityalliance/kestrel-lang
+        :alt: Code Coverage
+
 .. image:: https://img.shields.io/pypi/v/kestrel-lang
         :target: https://pypi.python.org/pypi/kestrel-lang
         :alt: Latest Version

From 8f69a33aa04b50c0d7353f50dd25a0f7d6a86d86 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 23:34:34 -0400
Subject: [PATCH 33/35] minor README update

---
 README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.rst b/README.rst
index bc57af30..44c7b9af 100644
--- a/README.rst
+++ b/README.rst
@@ -118,7 +118,7 @@ Kestrel Hunting Blogs
 Talks And Demos
 ===============
 
-- 2022/04 `SC eSummit on Threat Hunting & Offense Security`_ (register to watch for free)
+- 2022/04 `SC eSummit on Threat Hunting & Offense Security`_ (free to register/playback)
 - 2021/12 `Infosec Jupyterthon 2021`_ [`IJ'21 live hunt recording`_]
 - 2021/11 `BlackHat Europe 2021`_
 - 2021/10 `SANS Threat Hunting Summit 2021`_: [`SANS'21 session recording`_]

From d8f15b7a20f0123430bf819d590f6af53eebc94e Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Thu, 21 Apr 2022 23:37:09 -0400
Subject: [PATCH 34/35] black tests to test codecov

---
 tests/test_completion.py | 52 +++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/tests/test_completion.py b/tests/test_completion.py
index 010b96c2..35218b97 100644
--- a/tests/test_completion.py
+++ b/tests/test_completion.py
@@ -10,11 +10,26 @@
 
 
 KNOWN_ETYPES = {
-    'artifact', 'autonomous-system', 'directory', 'domain-name',
-    'email-addr', 'email-message', 'file', 'ipv4-addr', 'ipv6-addr',
-    'mac-addr', 'mutex', 'network-traffic', 'process', 'software',
-    'url', 'user-account', 'windows-registry-key', 'x-ibm-finding',
-    'x-oca-asset', 'x-oca-event'
+    "artifact",
+    "autonomous-system",
+    "directory",
+    "domain-name",
+    "email-addr",
+    "email-message",
+    "file",
+    "ipv4-addr",
+    "ipv6-addr",
+    "mac-addr",
+    "mutex",
+    "network-traffic",
+    "process",
+    "software",
+    "url",
+    "user-account",
+    "windows-registry-key",
+    "x-ibm-finding",
+    "x-oca-asset",
+    "x-oca-event",
 }
 
 
@@ -23,9 +38,11 @@ def a_session():
     cwd = os.path.dirname(os.path.abspath(__file__))
     bundle = os.path.join(cwd, "test_bundle.json")
     session = Session(debug_mode=True)
-    stmt = ("conns = get network-traffic"
-            f" from file://{bundle}"
-            " where [network-traffic:dst_port < 10000]")
+    stmt = (
+        "conns = get network-traffic"
+        f" from file://{bundle}"
+        " where [network-traffic:dst_port < 10000]"
+    )
     session.execute(stmt)
     return session
 
@@ -36,10 +53,14 @@ def a_session():
         ("x", []),  # No suggestions
         ("x ", {"=", "+"}),
         ("c", {"onns"}),
-        ("conns", ['']),  # Empty string means word is complete
+        ("conns", [""]),  # Empty string means word is complete
         ("conns ", {"=", "+"}),
         ("disp ", {"conns", "_"} | TRANSFORMS),
-        ("procs = ", {"GET", "FIND", "JOIN", "SORT", "GROUP", "LOAD", "NEW", "conns", "_"} | TRANSFORMS),
+        (
+            "procs = ",
+            {"GET", "FIND", "JOIN", "SORT", "GROUP", "LOAD", "NEW", "conns", "_"}
+            | TRANSFORMS,
+        ),
         ("procs = G", {"ET", "ROUP"}),
         ("procs = F", {"IND"}),
         ("procs = FI", {"ND"}),
@@ -47,8 +68,8 @@ def a_session():
         ("procs = FIND", []),
         ("procs = FIND ", KNOWN_ETYPES),
         ("procs = FIND p", ["rocess"]),
-        ("procs = FIND process", ['']),
-        #("procs = FIND process ", {"created", "loaded", "linked"}),
+        ("procs = FIND process", [""]),
+        # ("procs = FIND process ", {"created", "loaded", "linked"}),
         ("procs = FIND process ", all_relations),
         ("procs = FIND process l", {"oaded", "inked"}),
         ("procs = FIND process c", {"reated", "ontained"}),
@@ -63,9 +84,12 @@ def a_session():
         ("urls = ge", ["t"]),
         ("urls = get ", KNOWN_ETYPES),
         ("urls = get url ", ["FROM", "WHERE"]),
-        ("urls = get url from ", ["_", "conns", "file://", "http://", "https://", "stixshifter://"]),
+        (
+            "urls = get url from ",
+            ["_", "conns", "file://", "http://", "https://", "stixshifter://"],
+        ),
         ("urls = get url where ", []),
-   ]
+    ],
 )
 def test_do_complete_after_get(a_session, code, expected):
     result = a_session.do_complete(code, len(code))

From fc6e1e17d88525babffcbbc692956af2de84ba07 Mon Sep 17 00:00:00 2001
From: Xiaokui Shu <subbyte@gmail.com>
Date: Fri, 22 Apr 2022 12:25:22 -0400
Subject: [PATCH 35/35] v1.3.2

---
 CHANGELOG.rst | 35 +++++++++++++++++++++++++++++++++++
 setup.cfg     |  2 +-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 60337f81..a2227df9 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,41 @@ All notable changes to this project will be documented in this file.
 
 The format is based on `Keep a Changelog`_.
 
+1.3.2 (2022-04-22)
+==================
+
+Added
+-----
+
+- runtime warning generation for invalid entity type #200
+- auto-complete relation in FIND
+- auto-complete BY and variable in FIND
+- add logo to readthedocs
+- upgrade auto-complete keywords to be case sensitive #213
+- add testing coverage into github workflows
+- add codecov badge to README
+- 31 unit tests for auto-completion
+- the first unit test for JOIN
+- two unit tests for ASSIGN
+- five unit tests for EXPRESSION
+- use tmp dir for generated testing data
+- auto-deref with mixed ipv4/ipv6 in network-traffic
+
+Fixed
+-----
+
+- missing ``_refs`` handling for 2 cases out of 4 #205
+- incorrectly derefering attributes after GROUP BY
+- incorrectly yielding variable when auto-completing relation in FIND
+- pylint errors about undefined-variables
+
+Changed
+-------
+
+- update grammar to separate commands yielding (or not) a variable
+- change FUNCNAME from a terminal to an inlined rule
+- differentiate the terminal "by"i between FIND and SORT/GROUP
+
 1.3.1 (2022-04-16)
 ==================
 
diff --git a/setup.cfg b/setup.cfg
index a9d50c78..1b0c3d7f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = kestrel-lang
-version = 1.3.1
+version = 1.3.2
 description = Kestrel Threat Hunting Language
 long_description = file:README.rst
 long_description_content_type = text/x-rst