Merge pull request #7 from Veridise/yanju/dev

Added more practical grammar, executable and README
Veridise · Mar 5, 2024 · 97e60cd · 97e60cd
2 parents 27f517b + 445d745
commit 97e60cd
Show file tree

Hide file tree

Showing 15 changed files with 1,039 additions and 750 deletions.
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -2,9 +2,9 @@ name: Dev CI
 
 on:
     push:
-        branches: [ "main", "dev" ]
+        branches: [ "main", "yanju/dev" ]
     pull_request:
-        branches: [ "main", "dev" ]
+        branches: [ "main", "yanju/dev" ]
     workflow_dispatch:
 
 jobs:
@@ -17,11 +17,7 @@ jobs:
                 python-version: '3.12'
             - name: install dependencies
               run: |
-                pip install networkx[default]
-                pip install beautifulsoup4
-                pip install pandas
-                pip install antlr4-tools
-                pip install antlr4-python3-runtime==4.13.1
+                pip install .
                 antlr4 -v 4.13.1
             - name: test hello.py
               run: python tests/scripts/hello.py
@@ -37,11 +33,7 @@ jobs:
                 python-version: '3.12'
             - name: install dependencies
               run: |
-                pip install networkx[default]
-                pip install beautifulsoup4
-                pip install pandas
-                pip install antlr4-tools
-                pip install antlr4-python3-runtime==4.13.1
+                pip install .
                 antlr4 -v 4.13.1
             - name: test parsing.py
               run: PYTHONPATH="./" python ./tests/scripts/parsing.py
@@ -55,11 +47,7 @@ jobs:
                 python-version: '3.12'
             - name: install dependencies
               run: |
-                pip install networkx[default]
-                pip install beautifulsoup4
-                pip install pandas
-                pip install antlr4-tools
-                pip install antlr4-python3-runtime==4.13.1
+                pip install .
                 antlr4 -v 4.13.1
             - name: test divz
               run: PYTHONPATH="./" python ./tests/scripts/test-divz.py
@@ -73,11 +61,7 @@ jobs:
                 python-version: '3.12'
             - name: install dependencies
               run: |
-                pip install networkx[default]
-                pip install beautifulsoup4
-                pip install pandas
-                pip install antlr4-tools
-                pip install antlr4-python3-runtime==4.13.1
+                pip install .
                 antlr4 -v 4.13.1
             - name: test infoleak
               run: PYTHONPATH="./" python ./tests/scripts/test-infoleak.py
@@ -91,11 +75,7 @@ jobs:
                 python-version: '3.12'
             - name: install dependencies
               run: |
-                pip install networkx[default]
-                pip install beautifulsoup4
-                pip install pandas
-                pip install antlr4-tools
-                pip install antlr4-python3-runtime==4.13.1
+                pip install .
                 antlr4 -v 4.13.1
             - name: test rtcnst
               run: PYTHONPATH="./" python ./tests/scripts/test-rtcnst.py
@@ -109,11 +89,7 @@ jobs:
                 python-version: '3.12'
             - name: install dependencies
               run: |
-                pip install networkx[default]
-                pip install beautifulsoup4
-                pip install pandas
-                pip install antlr4-tools
-                pip install antlr4-python3-runtime==4.13.1
+                pip install .
                 antlr4 -v 4.13.1
             - name: test unused
               run: PYTHONPATH="./" python ./tests/scripts/test-unused.py
diff --git a/README.md b/README.md
@@ -2,6 +2,19 @@
 
 This repo hosts an open-source Python branch of the static analysis tool Vanguard developed by Veridise. This version is optimized for analyzing Leo/Aleo programs.
 
+## Table of Contents
+
+- [Prerequisites](#prerequisites)
+- [Usage](#usage)
+  - [Commandline Executable](#commandline-executable)
+    - [Example Commands](#example-commands)
+  - [Calling from Source](#calling-from-source)
+  - [Calling as Library](#calling-as-library)
+- [Detectors Available](#detectors-available)
+- [Example Leo/Aleo Vulnerabilities](#example-leoaleo-vulnerabilities)
+- [Parser/Lexer Generation](#parserlexer-generation)
+- [Test Suite and Static Analysis APIs](#test-suite-and-static-analysis-apis)
+
 ## Prerequisites
 
 The following libraries are required for running (different components of) the tool:
@@ -13,12 +26,81 @@ The following libraries are required for running (different components of) the t
     - `pip install antlr4-tools`
   - [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/) (4.12.2+) for crawling benchmarks from public explorers in test suite
   - [pandas](https://pandas.pydata.org/)  (2.1.4+) for data analysis in test suite
+  - [tabulate](https://github.com/astanin/python-tabulate) (0.9.0+) for result table rendering
 - <u>Leo (**7ac50d8**) for compiling and running all benchmarks enclosed</u>
   - The tools is tested under this version, but newer version of Lao may also work.
 
-## Vanguard for Aleo
+## Usage
+
+The library of Vanguard for Aleo provides common vulnerability detectors and basic utilities for writing detectors based on static analysis. There are three ways to use and integrate the tool into your workflow, namely: commandline executable, calling from source and calling as library.
+
+### Commandline Executable
+
+The analyzer can be installed via `pip` setup tools by running:
+
+```bash
+pip install .
+```
+
+and if you want to remove it:
+
+```bash
+pip uninstall vanguard
+```
+
+After installation, you can directly use the commandline executable `vanguard-aleo` provided:
+
+```bash
+usage: vanguard-aleo [-h] [-b BUILD] [-p PID] [-f FIDS] [-d {divz,infoleak,rtcnst,unused}] [-v]
+
+options:
+  -h, --help            show this help message and exit
+  -b BUILD, --build BUILD
+                        project build path, default: ./
+  -p PID, --pid PID     program id, default: <project main entrance>
+  -f FIDS, --fids FIDS  function ids (separated by comma, no space), default: <all functions of project>
+  -d {divz,infoleak,rtcnst,unused}, --detector {divz,infoleak,rtcnst,unused}
+                        detector to use, default: infoleak
+  -v, --verbose         whether or not to return extra info, default: False
+```
+
+#### Example Commands
+
+- Test detector `infoleak` on all functions of the main program of a project:
+
+  ```bash
+  vanguard-aleo -b ./tests/public/infoleak0/build/ -d infoleak
+  ```
+
+- Test detector `infoleak` on function `ex0` of the main program of a project:
+
+  ```bash
+  vanguard-aleo -b ./tests/public/infoleak0/build/ -f ex0 -d infoleak
+  ```
+
+- Test detector `infoleak` of multiple functions `ex0`, `ex1` and `ex2` of the program `infoleak0.aleo`:
+
+  ```bash
+  vanguard-aleo -b ./tests/public/infoleak0/build/ -f ex0,ex1,ex2 -p infoleak0.aleo -d infoleak
+  ```
+
+- Test detector `infoleak` of multiple functions `ex0`, `ex1` and `ex2` of the program `infoleak0.aleo`, and print out extra information about the finding:
+
+  ```bash
+  vanguard-aleo -b ./tests/public/infoleak0/build/ -f ex0,ex1,ex2 -p infoleak0.aleo -d infoleak -v
+  ```
+
+  This will produce the following output:
+
+  ```
+  |   id | program        | function   | detector   | result   | info           |
+  |------|----------------|------------|------------|----------|----------------|
+  |    0 | infoleak0.aleo | ex0        | infoleak   | unsafe   | [('r0', 'r0')] |
+  |    1 | infoleak0.aleo | ex1        | infoleak   | safe     | []             |
+  |    2 | infoleak0.aleo | ex2        | infoleak   | unsafe   | [('r0', 'r1')] |
+  ```
 
-The library of Vanguard for Aleo provides common vulnerability detectors and basic utilities for writing detectors based on static analysis. To use the tool, you can call it directly from the repo or install it as a library.
+  where the info column provides more information about the detected vulnerability. For example, in function `ex0` there's information leakage from variable `r0` to `r0` (direct returning of input), and in `ex2` from `r0` to `r1`. 
 
 ### Calling from Source
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "vanguard"
-version = "0.0.2"
+version = "0.0.3"
 authors = [
   { name="Yanju Chen", email="yanju@veridise.com" },
 ]
@@ -18,8 +18,13 @@ dependencies = [
     "beautifulsoup4>=4.12.2",
     "antlr4-python3-runtime==4.13.1",
     "pandas>=2.1.4",
+    "tabulate>=0.9.0",
     "antlr4-tools",
 ]
 
-[project.urls]
-"Homepage" = "https://veridise.com/"
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project.scripts]
+vanguard-aleo = "vanguard.aleo.run:run"
diff --git a/tests/scripts/dep.py b/tests/scripts/dep.py
@@ -2,6 +2,7 @@
 import bs4
 import pandas
 import antlr4
+import tabulate
 
 if __name__ == "__main__":
     print("Hello World!")
diff --git a/tests/test4.ipynb b/tests/test4.ipynb
@@ -15,7 +15,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "b34815c6-9610-4ef1-a7d2-4ee62cd30ae7",
    "metadata": {},
    "outputs": [],
@@ -96,48 +96,50 @@
      "output_type": "stream",
      "text": [
       "# [debug] deploy: main.aleo\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex0, expected: True, actual: True\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex1, expected: False, actual: False\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex2, expected: True, actual: True\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex3, expected: True, actual: True\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex4, expected: True, actual: True\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex5, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex6, expected: True, actual: False\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex7, expected: False, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex8, expected: True, actual: False\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex9, expected: True, actual: True\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex10, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex11, expected: False, actual: True\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex12, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex13, expected: True, actual: False\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex14, expected: True, actual: True\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex15, expected: True, actual: True\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex16, expected: True, actual: True\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex17, expected: True, actual: False\n",
-      "# [✓][test] pid: infoleak0.aleo, fid: ex18, expected: True, actual: True\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex19, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex20, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex21, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex22, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex23, expected: True, actual: False\n",
-      "# [✗][test] pid: infoleak0.aleo, fid: ex24, expected: True, actual: False\n",
-      "# [test] accuracy: 11/25 (0.4400)\n",
+      "# [debug] deploy: helpers.aleo\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex0, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex1, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex2, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex3, expected: True, actual: True\n",
+      "# [✗][test] pid: divz0.aleo, fid: ex4, expected: False, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex5, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex6, expected: True, actual: True\n",
+      "# [✗][test] pid: divz0.aleo, fid: ex7, expected: False, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex8, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex9, expected: False, actual: False\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex10, expected: False, actual: False\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex11, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex12, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex13, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex14, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex15, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex16, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex17, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex18, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex19, expected: True, actual: True\n",
+      "# [✗][test] pid: divz0.aleo, fid: ex20, expected: True, actual: False\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex21, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex22, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex23, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex24, expected: True, actual: True\n",
+      "# [✓][test] pid: divz0.aleo, fid: ex25, expected: True, actual: True\n",
+      "# [test] accuracy: 23/26 (0.8846)\n",
       "# [test] confusion matrix:\n",
       "  actual    False  True \n",
       "expected              \n",
-      "False         2      1\n",
-      "True         13      9\n",
+      "False         2      2\n",
+      "True          1     21\n",
       "# [test] normalized confusion matrix:\n",
       "  actual       False     True \n",
       "expected                    \n",
-      "False     0.666667  0.333333\n",
-      "True      0.590909  0.409091\n"
+      "False     0.500000  0.500000\n",
+      "True      0.045455  0.954545\n"
      ]
     }
    ],
    "source": [
-    "# r = run_test_suite(\"./tests/public/divz0/build/\", detector_divz, verbose=True)\n",
-    "r = run_test_suite(\"./tests/public/infoleak0/build/\", detector_infoleak, verbose=True)\n",
+    "r = run_test_suite(\"./tests/public/divz0/build/\", detector_divz, verbose=True)\n",
+    "# r = run_test_suite(\"./tests/public/infoleak0/build/\", detector_infoleak, verbose=True)\n",
     "# r = run_test_suite(\"./tests/public/rtcnst0/build/\", detector_rtcnst, verbose=True)\n",
     "# r = run_test_suite(\"./tests/public/unused0/build/\", detector_unused, verbose=True)"
    ]

diff --git a/vanguard/aleo/common.py b/vanguard/aleo/common.py
@@ -1,4 +1,3 @@
-from io import StringIO
 from pathlib import Path
 from typing import List, Union
 
@@ -33,3 +32,23 @@ def aleo2json(path: Union[str, Path]):
     parser = AleoParser(stream)
     tree = parser.start()
     return Trees.toJsonTree(tree, None, parser)
+
+def detect(build_path: Union[str, Path], pid: str=None, fids: List=None, detector: str=None, **kwargs):
+    # pid (default: main program of environment) - program id
+    # fids (default: all functions of selected program) - list of function ids
+    # detector (default: infoleak) - detector to use
+
+    # NOTE: for clarity, only one detector can be used in each call
+
+    from .grammar import AleoEnvironment
+    from . import detectors as dlib
+
+    env = AleoEnvironment(build_path)
+    _detector = getattr(dlib, "detector_infoleak") if detector is None else getattr(dlib, f"detector_{detector}")
+
+    prog = env.main if pid is None else env.programs[pid]
+    funcs = list(prog.functions.values()) if fids is None else [prog.functions[p] for p in fids]
+
+    # start detection
+    ret = [ (str(prog.id), str(fn.id)) + _detector(env, prog.id, fn.id, **kwargs) for fn in funcs ]
+    return ret
diff --git a/vanguard/aleo/detectors/divz.py b/vanguard/aleo/detectors/divz.py
@@ -149,6 +149,9 @@ def a(node):
             return node
         case AleoLiteral():
             return AleoAbstractLiteral.abs(node)
+        case [*_]:
+            # collection, directly return since each element should've been visited already
+            return node
         case _:
             raise NotImplementedError(f"Can't wrap a non-literal, got: {node}")