differentiate 6th mode from others

JetBrains-Research · Jan 2, 2025 · 3496587 · 3496587
1 parent ff2a636
commit 3496587
Show file tree

Hide file tree

Showing 9 changed files with 67 additions and 221 deletions.
diff --git a/scripts/generate_avg_graph.py b/scripts/generate_avg_graph.py
@@ -0,0 +1,30 @@
+# %%
+import matplotlib.pyplot as plt
+import json
+from itertools import accumulate
+import pathlib
+
+# %%
+path = "verified_cogen/results/tries_Bench_mode6_avg.json"
+bench = pathlib.Path("benches/HumanEval-Nagini/Bench")
+file_cnt = len(list(bench.glob("*.py")))
+with open(path) as f:
+    data = json.load(f)
+
+cnt = list(data.values())
+for i in range(1, len(cnt)):
+    cnt[i] += cnt[i - 1]
+
+print(file_cnt)
+cnt = [100 * (c / file_cnt) for c in cnt]
+
+fig, ax = plt.subplots()  # type: ignore
+plt.title("Percentage of files verified")  # type: ignore
+plt.xlabel("Number of tries")  # type: ignore
+plt.ylabel("Percentage of files")  # type: ignore
+ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x}%"))  # type: ignore
+ax.plot(cnt)  # type: ignore
+plt.show()
+
+# %%
+f"{cnt[-1]}% of the files were successfully verified"
diff --git a/tests/test_dafny.py b/tests/test_dafny.py
@@ -31,168 +31,6 @@ def test_dafny_generate():
         """
     )
 
-# def test_dafny_generate1():
-#     dafny_lang = LanguageDatabase().get("dafny")
-#     code = dedent(
-#         """\
-# function ParenthesesDepth(s: string, i: int, j: int): int
-#     decreases j - i
-#     requires 0 <= i <= j <= |s|
-# {
-#     if i == j then
-#         0
-#     else if s[i] == '(' then
-#         ParenthesesDepth(s, i+1, j) + 1
-#     else if s[i] == ')' then
-#         ParenthesesDepth(s, i+1, j) - 1
-#     else
-#         ParenthesesDepth(s, i+1, j)
-# }
-#
-# function InnerDepthsPositive(s: string): bool
-# {
-#     forall i :: 0 < i < |s| ==> ParenthesesDepth(s, 0, i) > 0
-# }
-#
-# function InnerDepthsNonnegative(s: string): bool
-# {
-#     forall i :: 0 < i < |s| ==> ParenthesesDepth(s, 0, i) >= 0
-# }
-#
-# lemma ParenthesesDepthSum(s: string, i: int, j: int, k: int)
-#     decreases j - i
-#     requires 0 <= i <= j <= k <= |s|
-#     ensures ParenthesesDepth(s, i, k) == ParenthesesDepth(s, i, j) + ParenthesesDepth(s, j, k)
-# {
-#     if i != j {
-#         ParenthesesDepthSum(s, i+1, j, k);
-#     }
-# }
-#
-# lemma ParenthesesSuffixEq(s: string, i: int, j: int)
-#     decreases j -i
-#     requires 0 <= i <= j <= |s|
-#     ensures ParenthesesDepth(s, i, j) == ParenthesesDepth(s[..j], i, j)
-# {
-#     if i != j {
-#         ParenthesesSuffixEq(s, i+1, j);
-#     }
-# }
-#
-# lemma ParenthesesPrefixEq(s: string, i: int, j: int)
-#     decreases j -i
-#     requires 0 <= i <= j <= |s|
-#     ensures ParenthesesDepth(s, i, j) == ParenthesesDepth(s[i..], 0, j-i)
-# { }
-#
-# lemma ParenthesesSubstring(s: string, i: int, j: int)
-#     decreases j - i
-#     requires 0 <= i <= j <= |s|
-#     ensures ParenthesesDepth(s, i, j) == ParenthesesDepth(s[i..j], 0, j-i)
-# {
-#     assert ParenthesesDepth(s, i, j) == ParenthesesDepth(s[..j], i, j)
-#         by { ParenthesesSuffixEq(s, i, j); }
-#     assert ParenthesesDepth(s[..j], i, j) == ParenthesesDepth(s[i..j], 0, j-i)
-#         by { ParenthesesPrefixEq(s[..j], i, j); }
-# }
-#
-# lemma ParenthesesCommonSegment(s: string, t: string, i: int, j: int)
-#     requires 0 <= i <= j <= |s|
-#     requires 0 <= i <= j <= |t|
-#     requires s[i..j] == t[i..j]
-#     ensures ParenthesesDepth(s, i, j) == ParenthesesDepth(t, i, j)
-# {
-#     ParenthesesSubstring(s, i, j);
-#     ParenthesesSubstring(t, i, j);
-# }
-#
-# lemma ParenthesesDepthAppend(s: string, c: char)
-#     ensures ParenthesesDepth(s + [c], 0, |s|+1) == ParenthesesDepth(s, 0, |s|) + ParenthesesDepth([c], 0, 1)
-# {
-#     ParenthesesSubstring(s + [c], 0, |s|);
-#     ParenthesesSubstring(s + [c], |s|, |s| + 1);
-#     ParenthesesDepthSum(s + [c], 0, |s|, |s| + 1);
-# }
-#
-# lemma InnerDepthsPositiveAppendDecompose(s: string, c: char)
-#     requires InnerDepthsPositive(s)
-#     requires ParenthesesDepth(s, 0, |s|) > 0
-#     ensures InnerDepthsPositive(s + [c])
-# {
-#     forall i: int | 0 < i < |s| + 1
-#         ensures ParenthesesDepth(s + [c], 0, i) > 0
-#     {
-#         if (i <= |s|) {
-#             ParenthesesCommonSegment(s, s + [c], 0, i);
-#         }
-#     }
-# }
-#
-# method separate_paren_groups(paren_string: string) returns (res : seq<string>)
-#     // pre-conditions-start
-#     requires ParenthesesDepth(paren_string, 0, |paren_string|) == 0
-#     requires InnerDepthsNonnegative(paren_string)
-#     // pre-conditions-end
-#     // post-conditions-start
-#     ensures forall k :: 0 <= k < |res| ==> ParenthesesDepth(res[k], 0, |res[k]|) == 0
-#     ensures forall k :: 0 <= k < |res| ==> InnerDepthsPositive(res[k])
-#     // post-conditions-end
-# {
-#     // impl-start
-#     res := [];
-#     var current_string: string := "";
-#     var current_depth: int := 0;
-#
-#     for i := 0 to |paren_string|
-#         // invariants-start
-#         invariant forall k :: 0 <= k < |res| ==> ParenthesesDepth(res[k], 0, |res[k]|) == 0
-#         invariant forall k :: 0 <= k < |res| ==> InnerDepthsPositive(res[k])
-#         invariant ParenthesesDepth(paren_string, 0, |paren_string|) == 0
-#         invariant InnerDepthsNonnegative(paren_string)
-#         invariant ParenthesesDepth(paren_string, i, |paren_string|) + current_depth == 0
-#         invariant ParenthesesDepth(paren_string, 0, i) == current_depth
-#         invariant ParenthesesDepth(current_string, 0, |current_string|) == current_depth
-#         invariant InnerDepthsPositive(current_string)
-#         invariant current_string == "" || current_depth > 0
-#         invariant current_depth >= 0
-#         // invariants-end
-#     {
-#         var c: char := paren_string[i];
-#
-#         // lemma-use-start
-#         ParenthesesDepthAppend(current_string, c);
-#         ParenthesesDepthSum(paren_string, 0, i, i+1);
-#         if (current_string != "") {
-#             InnerDepthsPositiveAppendDecompose(current_string, c);
-#         }
-#         // lemma-use-end
-#
-#         if (c == '(')
-#         {
-#             current_depth := current_depth + 1;
-#             current_string := current_string + [c];
-#         }
-#         else if (c == ')')
-#         {
-#             current_depth := current_depth - 1;
-#             current_string := current_string + [c];
-#
-#             if (current_depth == 0)
-#             {
-#                 res := res + [current_string];
-#                 current_string := "";
-#             }
-#         }
-#     }
-#     // impl-end
-# }
-# """
-#     )
-#     assert dafny_lang.generate_validators(code, True) == dedent(
-#         """
-#         """
-#     )
-
 
 def test_dafny_generate_with_helper():
     dafny_lang = LanguageDatabase().get("dafny")
@@ -210,10 +48,6 @@ def test_dafny_generate_with_helper():
     )
     assert dafny_lang.generate_validators(code, True) == dedent(
         """\
-        function abs_copy_pure(n: int): nat { 
-             if n > 0 then n else -n  
-        }
-        
         function abs_valid_pure(n: int): nat { abs(n) }
         
         method main_valid(value: int) returns (result: int)

diff --git a/tests/test_nagini.py b/tests/test_nagini.py
@@ -253,22 +253,12 @@ def flip__case(s : List[int]) -> List[int] :
     assert nagini_lang.generate_validators(code, True) == dedent(
         """\
         
-        @Pure
-        def lower_copy_pure(c : int) -> bool :
-                
-            return ((0) <= (c)) and ((c) <= (25))
-        
         @Pure
         def lower_valid_pure(c : int) -> bool :
         
         
             ret = lower(c)
             return ret
-            
-        @Pure
-        def upper_copy_pure(c : int) -> bool :
-            
-            return ((26) <= (c)) and ((c) <= (51))
         
         @Pure
         def upper_valid_pure(c : int) -> bool :
@@ -277,36 +267,18 @@ def upper_valid_pure(c : int) -> bool :
             ret = upper(c)
             return ret
             
-        @Pure
-        def alpha_copy_pure(c : int) -> bool :
-            
-            return (lower_copy_pure(c)) or (upper_copy_pure(c))
-        
         @Pure
         def alpha_valid_pure(c : int) -> bool :
         
         
             ret = alpha(c)
             return ret
             
-        @Pure
-        def flip__char_copy_pure(c : int) -> int :
-            # pre-conditions-start
-            Ensures(lower_copy_pure(c) == upper_copy_pure(Result()))
-            Ensures(upper_copy_pure(c) == lower_copy_pure(Result()))
-            # pre-conditions-end
-            if lower_copy_pure(c):
-                return ((c) - (0)) + (26)
-            elif upper_copy_pure(c):
-                return ((c) + (0)) - (26)
-            elif True:
-                return c
-            
         @Pure
         def flip__char_valid_pure(c : int) -> int :
             # pre-conditions-start
-            Ensures(lower_copy_pure(c) == upper_copy_pure(Result()))
-            Ensures(upper_copy_pure(c) == lower_copy_pure(Result()))
+            Ensures(lower(c) == upper(Result()))
+            Ensures(upper(c) == lower(Result()))
             # pre-conditions-end
             
             ret = flip__char(c)
@@ -320,8 +292,8 @@ def flip__case_valid(s : List[int]) -> List[int] :
             Ensures(Acc(list_pred(s)))
             Ensures(Acc(list_pred(Result())))
             Ensures((len(Result())) == (len(s)))
-            Ensures(Forall(int, lambda d_0_i_: (Implies(((0) <= (d_0_i_)) and ((d_0_i_) < (len(s))), lower_copy_pure((s)[d_0_i_]) == upper_copy_pure((Result())[d_0_i_])))))
-            Ensures(Forall(int, lambda d_0_i_: (Implies(((0) <= (d_0_i_)) and ((d_0_i_) < (len(s))), upper_copy_pure((s)[d_0_i_]) == lower_copy_pure((Result())[d_0_i_])))))
+            Ensures(Forall(int, lambda d_0_i_: (Implies(((0) <= (d_0_i_)) and ((d_0_i_) < (len(s))), lower((s)[d_0_i_]) == upper((Result())[d_0_i_])))))
+            Ensures(Forall(int, lambda d_0_i_: (Implies(((0) <= (d_0_i_)) and ((d_0_i_) < (len(s))), upper((s)[d_0_i_]) == lower((Result())[d_0_i_])))))
             # post-conditions-end
             
             ret = flip__case(s)

diff --git a/tests/test_verus.py b/tests/test_verus.py
@@ -61,11 +61,6 @@ def test_verus_generate():
     assert verus_lang.generate_validators(code, True) == dedent(
         """\
         verus!{
-        spec fn test_copy_pure(val: i32) -> (result: i32) 
-        {
-            val
-        }
-        
         spec fn test_valid_pure(val: i32) -> (result: i32) 
         { let ret = test(val); ret }
         

diff --git a/verified_cogen/llm/llm.py b/verified_cogen/llm/llm.py
@@ -136,9 +136,8 @@ def rewrite(
         result = prompts.rewrite_prompt(self.prompt_dir).replace("{program}", prg)
         if text_description is not None and "{text_description}" in result:
             result = result.replace("{text_description}", text_description)
+        result = result + "\n" + additional_prompt
         self.add_user_prompt(result)
-        if additional_prompt:
-            self.add_user_prompt(additional_prompt)
         return self.make_request()
 
     def ask_for_fixed(self, err: str) -> str:

diff --git a/verified_cogen/runners/languages/dafny.py b/verified_cogen/runners/languages/dafny.py
@@ -44,6 +44,7 @@ def __init__(self, remove_annotations: list[AnnotationType]):  # type: ignore
             DAFNY_VALIDATOR_TEMPLATE,
             DAFNY_VALIDATOR_TEMPLATE_PURE,
             DAFNY_VALIDATOR_TEMPLATE_PURE_COPY,
+            AnnotationType.PURE in remove_annotations,
             [
                 annotation_by_type[annotation_type]
                 for annotation_type in remove_annotations

diff --git a/verified_cogen/runners/languages/language.py b/verified_cogen/runners/languages/language.py
@@ -42,6 +42,7 @@ class GenericLanguage(Language):
     validator_template: str
     check_patterns: list[str]
     inline_assert_comment: str
+    remove_pure: bool
 
     def __init__(  # type: ignore
         self,
@@ -50,6 +51,7 @@ def __init__(  # type: ignore
         validator_template: str,
         validator_template_pure: str,
         validator_template_pure_copy: str,
+        remove_pure: bool,
         check_patterns: list[str],
         inline_assert_comment: str,
         simple_comment: str,
@@ -62,6 +64,7 @@ def __init__(  # type: ignore
         self.validator_template_pure_copy = validator_template_pure_copy
         self.check_patterns = check_patterns
         self.inline_assert_comment = inline_assert_comment
+        self.remove_pure = remove_pure
 
     def _validators_from(
         self,
@@ -149,25 +152,34 @@ def generate_validators(self, code: str, validate_helpers: bool) -> str:
             method_name = pure_match.group(1)
             pure_names.append(method_name)
 
-        for pure_match in pure_methods:
-            method_name, parameters, returns, specs, body = (
-                pure_match.group(1),
-                pure_match.group(2),
-                pure_match.group(3),
-                pure_match.group(4),
-                pure_match.group(5),
-            )
+        if self.remove_pure:
+            for pure_match in pure_methods:
+                method_name, parameters, returns, specs, body = (
+                    pure_match.group(1),
+                    pure_match.group(2),
+                    pure_match.group(3),
+                    pure_match.group(4),
+                    pure_match.group(5),
+                )
 
-            specs = self.replace_pure(specs, pure_names)
-            body = self.replace_pure(body, pure_names)
+                specs = self.replace_pure(specs, pure_names)
+                body = self.replace_pure(body, pure_names)
 
-            validators.append(
-                self._validators_from_pure_copy(
-                    method_name, parameters, returns, specs, body
+                validators.append(
+                    self._validators_from_pure_copy(
+                        method_name, parameters, returns, specs, body
+                    )
                 )
-            )
 
-            if validate_helpers:
+        if validate_helpers:
+            for pure_match in pure_methods:
+                method_name, parameters, returns, specs, body = (
+                    pure_match.group(1),
+                    pure_match.group(2),
+                    pure_match.group(3),
+                    pure_match.group(4),
+                    pure_match.group(5),
+                )
                 validators.append(
                     self._validators_from_pure(
                         method_name, parameters, returns, specs, body
@@ -185,7 +197,8 @@ def generate_validators(self, code: str, validate_helpers: bool) -> str:
             if method_name in pure_names:
                 continue
 
-            specs = self.replace_pure(specs, pure_names)
+            if self.remove_pure:
+                specs = self.replace_pure(specs, pure_names)
 
             validators.append(
                 self._validators_from(method_name, parameters, returns, specs)