Skip to content

Commit

Permalink
Added JSON manifest with CI validation (#61)
Browse files Browse the repository at this point in the history
Add CI (Github Actions) validation.

#61
#57
  • Loading branch information
ahelwer authored Feb 15, 2023
1 parent 03a1e31 commit 4d227cd
Show file tree
Hide file tree
Showing 38 changed files with 4,544 additions and 151 deletions.
7 changes: 7 additions & 0 deletions .ciignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Every line starting with # is a comment line
# Every non-blank non-comment line lists a directory to ignore in the CI
# This will keep parse or model check failures from failing the CI workflow
# Note that directory names are case-sensitive
# Example:
specifications/doesnotexist

223 changes: 223 additions & 0 deletions .github/scripts/check_manifest_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
"""
This script performs whatever validations are possible on the metadata in
the manifest.json file. Prominent checks include:
* .tla files containing pluscal or proofs are marked as such
* .tla files importing community modules have those modules listed
* .cfg files with certain features are marked as such
* Human-written fields are not empty
"""

import glob
import json
from os.path import basename, dirname, splitext
import tla_utils
from tree_sitter import Language, Parser

# Builds the tree-sitter-tlaplus grammar and constructs the parser
Language.build_library(
'build/tree-sitter-languages.so',
['tree-sitter-tlaplus']
)

TLAPLUS_LANGUAGE = Language('build/tree-sitter-languages.so', 'tlaplus')
parser = Parser()
parser.set_language(TLAPLUS_LANGUAGE)

def parse_module(path):
"""
Parses a .tla file; returns the parse tree along with whether a parse
error was detected.
"""
module_text = None
with open(path, 'rb') as module_file:
module_text = module_file.read()
tree = parser.parse(module_text)
return (tree, module_text, tree.root_node.has_error)

def get_module_names_in_dir(dir):
"""
Gets the module names of all .tla files in the given directory.
"""
return set([splitext(basename(path))[0] for path in glob.glob(f'{dir}/*.tla')])

# This query looks for pluscal and proof constructs
# It can be extended to look for other things if desired
feature_query = TLAPLUS_LANGUAGE.query(
'(pcal_algorithm_start) @pluscal'
+ '[(terminal_proof) (non_terminal_proof)] @proof'
)

def get_tree_features(tree):
"""
Returns any notable features in the parse tree, such as pluscal or proofs
"""
return set([name for _, name in feature_query.captures(tree.root_node)])

def get_module_features(path):
"""
Gets notable features for the .tla file at the given path
"""
tree, _, _ = parse_module(path)
return get_tree_features(tree)

# Keywords mapping to features for models
model_features = {
'PROPERTY': 'liveness',
'PROPERTIES': 'liveness',
'SYMMETRY': 'symmetry',
'ALIAS': 'alias',
'VIEW': 'view',
'CONSTRAINT': 'state constraint',
'CONSTRAINTS': 'state constraint',
}

def get_model_features(path):
"""
Finds features present in the given .cfg model file.
This will be a best-effort text search until a tree-sitter grammar is
created for .cfg files.
"""
features = []
model_text = None
with open(path, 'rt') as model_file:
model_text = model_file.read()
for line in model_text.split('\n'):
tokens = line.split()
if len(tokens) > 0 and tokens[0] in model_features:
features.append(model_features[tokens[0]])
return set(features)

# Query finding all module imports in a .tla file
import_query = TLAPLUS_LANGUAGE.query(
'(extends (identifier_ref) @extends)'
+ '(instance (identifier_ref) @instance)'
)

# Query finding all defined module names in a .tla file
# Especially useful for finding nested module names
module_name_query = TLAPLUS_LANGUAGE.query(
'(module name: (identifier) @module_name)'
)

# All the standard modules available when using TLC
tlc_modules = {
'Bags',
'FiniteSets',
'Integers',
'Json',
'Naturals',
'Randomization',
'RealTime',
'Reals',
'Sequences',
'TLC',
'TLCExt',
'Toolbox'
}

# All the standard modules available when using TLAPS
tlaps_modules = {
'BagsTheorems',
'FiniteSetTheorems',
'FunctionTheorems',
'NaturalsInduction',
'SequencesExtTheorems',
'SequenceTheorems',
'TLAPS',
'WellFoundedInduction'
}

# Modules overloaded by TLAPS; some of these are ordinarily imported as
# community modules.
tlaps_module_overloads = {
'Bags',
'FiniteSets',
'Functions',
'RealTime',
'SequencesExt'
}

def get_community_imports(tree, text, dir, has_proof):
"""
Gets all modules imported by a given .tla file that are not standard
modules or modules in the same file or directory. Community module
imports are what's left.
"""
imports = set(
[
text[node.start_byte:node.end_byte].decode('utf-8')
for node, _ in import_query.captures(tree.root_node)
]
)
modules_in_file = set(
[
text[node.start_byte:node.end_byte].decode('utf-8')
for node, _ in module_name_query.captures(tree.root_node)
]
)
imports = (
imports
- modules_in_file
- tlc_modules
- tlaps_modules
- get_module_names_in_dir(dir)
)
return imports - tlaps_module_overloads if has_proof else imports

def get_community_module_imports(path):
"""
Gets all community modules imported by the .tla file at the given path.
"""
tree, text, _ = parse_module(path)
dir = dirname(path)
has_proof = 'proof' in get_tree_features(tree)
return get_community_imports(tree, text, dir, has_proof)

if __name__ == '__main__':
manifest = tla_utils.load_manifest()

# Validates every field of the manifest that can be validated.
success = True
for spec in manifest['specifications']:
if spec['title'] == '':
success = False
print(f'Spec {spec["path"]} does not have a title')
if spec['description'] == '':
success = False
print(f'Spec {spec["path"]} does not have a description')
if not any(spec['authors']):
success = False
print(f'Spec {spec["path"]} does not have any listed authors')
for module in spec['modules']:
tree, text, parse_err = parse_module(module['path'])
if parse_err:
success = False
print(f'Module {module["path"]} contains syntax errors')
expected_features = get_tree_features(tree)
actual_features = set(module['features'])
if expected_features != actual_features:
success = False
print(
f'Module {module["path"]} has incorrect features in manifest; '
+ f'expected {list(expected_features)}, actual {list(actual_features)}'
)
expected_imports = get_community_imports(tree, text, dirname(module['path']), 'proof' in expected_features)
actual_imports = set(module['communityDependencies'])
if expected_imports != actual_imports:
success = False
print(
f'Module {module["path"]} has incorrect community dependencies in manifest; '
+ f'expected {list(expected_imports)}, actual {list(actual_imports)}'
)
for model in module['models']:
expected_features = get_model_features(model['path'])
actual_features = set(model['features'])
if expected_features != actual_features:
success = False
print(
f'Model {model["path"]} has incorrect features in manifest; '
+ f'expected {list(expected_features)}, actual {list(actual_features)}'
)

exit(0 if success else 1)

79 changes: 79 additions & 0 deletions .github/scripts/check_manifest_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""
Checks to ensure all files in manifest.json exist, that all .tla and .cfg
files in repo are recorded in manifest.json (except for those in .ciignore),
and that no files are present twice in the manifest. Also checks to ensure
no files in .ciignore are in the manifest.json.
"""

from collections import Counter
import json
from os.path import normpath
import glob
import tla_utils

manifest = tla_utils.load_manifest()

module_lists = [spec["modules"] for spec in manifest["specifications"]]
modules = [module for module_list in module_lists for module in module_list]
model_lists = [module["models"] for module in modules]

# Get all .tla and .cfg paths from the manifest
tla_mf_paths_cnt = Counter([normpath(module["path"]) for module in modules])
tla_mf_paths = set(tla_mf_paths_cnt)
cfg_mf_paths_cnt = Counter([normpath(model["path"]) for model_list in model_lists for model in model_list])
cfg_mf_paths = set(cfg_mf_paths_cnt)

# Get ignored directories
ignored_dirs = tla_utils.get_ignored_dirs()
ignore = lambda path : tla_utils.ignore(ignored_dirs, path)

# Get paths of all non-ignored .tla and .cfg files in the specifications dir
tla_fs_paths = set([
normpath(path) for path in glob.glob(f"./specifications/**/*.tla", recursive=True)
if '_TTrace_' not in path and not ignore(path)
])
cfg_fs_paths = set([
normpath(path) for path in glob.glob(f"./specifications/**/*.cfg", recursive=True)
if not ignore(path)
])

success = True

# Check whether manifest includes any ignored paths
ignored_tla_in_manifest = set(filter(ignore, tla_mf_paths))
ignored_cfg_in_manifest = set(filter(ignore, cfg_mf_paths))
if any(ignored_tla_in_manifest):
success = False
print('Ignored .tla paths present in manifest:\n' + '\n'.join(ignored_tla_in_manifest))
tla_mf_paths = tla_mf_paths - ignored_tla_in_manifest
if any(ignored_cfg_in_manifest):
success = False
print('Ignored .cfg paths present in manifest:\n' + '\n'.join(ignored_cfg_in_manifest))
cfg_mf_paths = cfg_mf_paths - ignored_cfg_in_manifest

# Check for duplicate paths in manifest
duplicate_tla_paths = [k for k, v in tla_mf_paths_cnt.items() if v > 1]
duplicate_cfg_paths = [k for k, v in cfg_mf_paths_cnt.items() if v > 1]
if any(duplicate_tla_paths):
success = False
print('.tla file paths duplicated in manifest:\n' + '\n'.join(duplicate_tla_paths))
if any(duplicate_cfg_paths):
success = False
print('.cfg file paths duplicated in manifest:\n' + '\n'.join(duplicate_cfg_paths))

# Check paths in manifest match paths on filesystem
if tla_mf_paths < tla_fs_paths:
success = False
print('.tla files not recorded in manifest:\n' + '\n'.join(tla_fs_paths - tla_mf_paths))
if tla_fs_paths < tla_mf_paths:
success = False
print('Manifest .tla files not found in specifications dir:\n' + '\n'.join(tla_mf_paths - tla_fs_paths))
if cfg_mf_paths < cfg_fs_paths:
success = False
print('.cfg files not recorded in manifest:\n' + '\n'.join(cfg_fs_paths - cfg_mf_paths))
if cfg_fs_paths < cfg_mf_paths:
success = False
print('Manifest .cfg files not found in specifications dir:\n' + '\n'.join(cfg_mf_paths - cfg_fs_paths))

exit(0 if success else 1)

21 changes: 21 additions & 0 deletions .github/scripts/check_manifest_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
Checks to ensure manifest.json is valid according to schema; this can also
be done manually at https://www.jsonschemavalidator.net/
Learn about the JSON schema format at
https://json-schema.org/understanding-json-schema/
"""

import json
from jsonschema import validate
import tla_utils

schema = tla_utils.load_schema()
manifest = tla_utils.load_manifest()

result = validate(instance=manifest, schema=schema)
if None == result:
exit(0)
else:
print(result)
exit(1)

Loading

0 comments on commit 4d227cd

Please sign in to comment.