Skip to content

Commit

Permalink
new scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
gt22 committed Dec 19, 2024
1 parent 64ff3b7 commit 854a5b4
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
This answer got Dafny verification error:
{error}
Please try again by taking the Dafny feedback.

If an error appears in one of method ending with `_valid`, that means annotations of an existing function were altered.
6 changes: 6 additions & 0 deletions prompts/dafny_eval_comment_without_impls_textd/rewrite.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Given a Dafny program with function signature, preconditions and postconditions, but with some implementations and annotations missing.
Please return a complete Dafny program with all functions implemented, and annotated using the strongest possible annotations (loop invariants, assert statements, etc.).
Use extensive comments in the code, but don't explain anything outside of it.
Please use exactly the same function signature, preconditions, and postconditions. Do not ever modify the given lines.
Below is the program:
{program}
4 changes: 4 additions & 0 deletions prompts/dafny_eval_comment_without_impls_textd/sys.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
You are an expert in Dafny.
You will be given tasks dealing with Dafny programs including precise annotations.
You respond only with code blocks.
You will write a comment explaining each significant point of the program on a line before.
62 changes: 62 additions & 0 deletions scripts/generate_heatmap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# %%
import matplotlib.pyplot as plt
import json
from itertools import accumulate
import pathlib
import seaborn as sns
import pandas as pd

# %%


def read_result(file):
with open(file) as f:
return {name[:3]: t if t >= 0 else 99 for name, t in json.load(f).items()}


# %%


def plot_results_heatmap(results_dict, width=35):
df = pd.DataFrame.from_dict({
source: entry_dict
for source, entry_dict in results_dict.items()
}, orient='index')

# Create the heatmap
plt.figure(figsize=(width, 4))
sns.heatmap(
df,
cmap='YlOrRd', # Color scheme - you can change this
annot=True, # Show numbers in cells
fmt='d', # Format as integers
cbar_kws={'label': 'Result'}
)

plt.title('Results Heatmap')
plt.xlabel('Entries')
plt.ylabel('Sources')
plt.tight_layout()


# %%
data = pathlib.Path('heatmap_data')
results = {str(x.name).removesuffix(".json"): read_result(x) for x in sorted(data.glob('*.json'))}
# %%
plot_results_heatmap(results, width=33)
plt.show()

# %%
all_tasks = list(next(iter(results.values())).keys())
solve_count = {t: sum(r[t] <= 10 for r in results.values()) for t in all_tasks}
# %%
nontrivial_tasks = [t for t in all_tasks if 0 < solve_count[t] < len(results)]
nontrivial_results = {s: {t: r[t] for t in nontrivial_tasks} for s, r in results.items()}
variant_tasks = [t for t in all_tasks if 2 <= solve_count[t] <= len(results) - 2]
variant_results = {s: {t: r[t] for t in variant_tasks} for s, r in results.items()}
# %%
plot_results_heatmap(nontrivial_results, width=19)
plt.show()
# %%
plot_results_heatmap(variant_results, width=10)
plt.show()
2 changes: 1 addition & 1 deletion scripts/generate_result_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pathlib

# %%
path = "../results/tries_HumanEval-Dafny.json"
path = "../results/tries_HumanEval-Dafny_comments.json"
bench = pathlib.Path("../benches/HumanEval-Dafny")
file_cnt = len(list(bench.glob("*.dfy")))
with open(path) as f:
Expand Down
49 changes: 49 additions & 0 deletions scripts/generate_result_graph_dafny_nagini.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# %%
import matplotlib.pyplot as plt
import json
from itertools import accumulate
import pathlib

# %%


def read_bench(d, ext):
bench = pathlib.Path(d)
return [x.name.removesuffix(f'.{ext}') for x in bench.glob(f'*.{ext}')]


# %%

intersect_bench = True

path = "results/dafny_4_comments.json"
dafny_bench = pathlib.Path("benches/HumanEval-Dafny")
nagini_bench = pathlib.Path("benches/HumanEval-Nagini/Bench")
dafny_files = read_bench("benches/HumanEval-Dafny", "dfy")
nagini_files = read_bench("benches/HumanEval-Nagini/Bench", "py")
shared_files = [x for x in dafny_files if x in nagini_files]
files = shared_files if intersect_bench else dafny_files
# %%
file_cnt = len(files)
with open(path) as f:
data = json.load(f)

tries = [value for file, value in data.items() if file.removesuffix('.dfy') in files and value != -1]
max_tries = max(tries)
cnt = [0] * (max_tries + 1)
for t in tries:
cnt[t] += 1
cnt = list(accumulate(cnt))
perc = [100 * (c / file_cnt) for c in cnt]

fig, ax = plt.subplots() # type: ignore
plt.title("Percentage of files verified") # type: ignore
plt.xlabel("Number of tries") # type: ignore
plt.ylabel("Percentage of files") # type: ignore
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x}%")) # type: ignore
ax.plot(perc) # type: ignore
# %%
plt.show()
# %%
print(f"{cnt[-1]} / {file_cnt}")
print(f"{round(perc[-1],2)}% of the files were successfully verified")

0 comments on commit 854a5b4

Please sign in to comment.