Merge pull request #18 from JetBrains-Research/manual-rewriting

Manual rewriting
JetBrains-Research · Nov 7, 2024 · f5731d2 · f5731d2
2 parents 356df09 + d9b44c2
commit f5731d2
Show file tree

Hide file tree

Showing 14 changed files with 921 additions and 14 deletions.
diff --git a/prompts/humaneval-nagini-cot-instruct/ask_for_fixed.txt b/prompts/humaneval-nagini-cot-instruct/ask_for_fixed.txt
@@ -3,4 +3,5 @@ The following errors occurred during verification:
 
 Please fix the error by adding, removing or modifying the invariants and return the fixed program.
 Don't add any additional text comments, your response must contain only program with invariants.
+You SHOULD NOT modify anything except invariants and asserts.
 Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else.
diff --git a/prompts/humaneval-nagini-cot-instruct/ask_for_fixed_had_errors.txt b/prompts/humaneval-nagini-cot-instruct/ask_for_fixed_had_errors.txt
@@ -3,4 +3,5 @@ There are still some errors:
 
 Could you please fix them?
 Don't add any additional text comments, your response must contain only program with invariants.
+You SHOULD NOT modify anything except invariants and asserts.
 Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else.
diff --git a/prompts/humaneval-nagini-cot-instruct/timeout.txt b/prompts/humaneval-nagini-cot-instruct/timeout.txt
@@ -1,3 +1,4 @@
 The verifier timed out during the verification.
 This usually means that the provided invariants were too broad or were difficult to check.
+You SHOULD NOT modify anything except invariants and asserts.
 Could you please try to improve the invariants and try again?
diff --git a/scripts/plot_statistics.py b/scripts/plot_statistics.py
@@ -1,5 +1,5 @@
 # %%
-with open('log_tries/logs3.txt', 'r') as file:
+with open('log_tries/logs9.txt', 'r') as file:
     # Read the file and split the contents into lines
     lines = file.readlines()
 
@@ -16,6 +16,7 @@
     "Loop invariant might not be preserved." : 0,
     "Loop invariant might not hold on entry." : 0,
     "Assert might fail." : 0,
+    "Verification timed out" : 0,
 }
 
 explanations = {
@@ -27,24 +28,25 @@
     "Loop invariant might not be preserved." : "Loop invariant might not be preserved",
     "Loop invariant might not hold on entry." : "Loop invariant might not hold on entry",
     "Assert might fail." : "Assert might fail",
+    "Verification timed out" : "Verification timed out",
 }
 
 dict_erros_numbered = {}
 
 for (key, value) in dict_erros.items():
-    for j in range(1, 11):
+    for j in range(1, 6):
         dict_erros_numbered[(key, j)] = 0
 
 print(dict_erros_numbered)
 
 # The 'lines' variable will now be a list of lines from the file
 idx_line = 0
 for line in lines:
-    if "Verification failed:" in line:
+    if "Verification failed:" in line or "Verification timed out" in line:
         idx_line += 1
-    if "verified with" in line:
+    if "Verified" in line or "Failed to verify" in line:
         idx_line = 0
-    if idx_line == 11:
+    if idx_line == 6:
         idx_line = 1
     for (key, value) in dict_erros.items():
         if key in line:
@@ -80,7 +82,7 @@
 # Step 3: Customize the plot
 plt.xlabel('Try')
 
-tries_range = range(1, 11)  # Assuming the "tries" are from 1 to 5
+tries_range = range(1, 6)  # Assuming the "tries" are from 1 to 5
 plt.xticks(tries_range)
 
 plt.ylabel('Number of Occurrences')

diff --git a/tests/test_inequality_replacer.py b/tests/test_inequality_replacer.py
@@ -0,0 +1,262 @@
+from textwrap import dedent
+from verified_cogen.tools.inequality_replacer import replace_inequalities, contains_double_inequality
+
+def test_simple_contains1():
+    code = dedent(
+        """\
+        def test():
+            if a >= b >= c > d:
+                print('Chained')
+        """
+    )
+
+    assert contains_double_inequality(code)
+
+def test_simple_contains2():
+    code = dedent(
+        """\
+Implies(1 <= k < d_6_i_, xs[k - 1] <= xs[k])
+d_4_increasing_ == Forall(int, lambda k: Implies(1 <= k < d_6_i_, xs[k - 1] <= xs[k]))
+Invariant(d_4_increasing_ == Forall(int, lambda k: Implies(1 <= k < d_6_i_, xs[k - 1] <= xs[k])))
+        """
+    )
+
+    new_code = replace_inequalities(code)
+    compare_code = dedent(
+        """\
+Implies(1 <= k and k < d_6_i_, xs[k - 1] <= xs[k])
+d_4_increasing_ == Forall(int, lambda k: Implies(1 <= k and k < d_6_i_, xs[k - 1] <= xs[k]))
+Invariant(d_4_increasing_ == Forall(int, lambda k: Implies(1 <= k and k < d_6_i_, xs[k - 1] <= xs[k])))"""
+    )
+
+    assert contains_double_inequality(code)
+    assert new_code == compare_code
+
+def test_simple_contains3():
+    code = dedent(
+        """\
+        def test():
+            if a < b and b < c and c <= d:
+                print('Chained')
+        """
+    )
+
+    assert not contains_double_inequality(code)
+
+def test_simple():
+    code = dedent(
+        """\
+        def test():
+            if a < b < c < d:
+                print('Chained')
+        """
+    )
+
+    new_code = replace_inequalities(code)
+
+    compare_code = dedent(
+        """\
+        def test():
+            if a < b and b < c and (c < d):
+                print('Chained')"""
+    )
+
+    assert new_code == compare_code
+
+def test_simple1():
+    code = dedent(
+        """\
+        def test():
+            if a < b <= c < d:
+                print('Chained')
+        """
+    )
+
+    new_code = replace_inequalities(code)
+
+    compare_code = dedent(
+        """\
+        def test():
+            if a < b and b <= c and (c < d):
+                print('Chained')"""
+    )
+
+    assert new_code == compare_code
+
+def test_complicated():
+    code = dedent(
+        """\
+from typing import cast, List, Dict, Set, Optional, Union
+from nagini_contracts.contracts import *
+
+@Pure
+def factorial__spec(n : int) -> int :
+    Requires(n >= -1)
+    Ensures(Result() >= 0)
+    if n == -1:
+        return 1
+    else:
+        return (n + 1) * factorial__spec(n - 1)
+
+@Pure
+def sum__spec(n : int) -> int :
+    Requires(n >= -1)
+    Ensures(Result() >= 0)
+    if 0 > n:
+        return 0
+    else:
+        return n + 1 + sum__spec(n - 1)
+
+def f(n : int) -> List[int]:
+    Requires((n) >= (1))
+    Ensures(Acc(list_pred(Result())))
+    Ensures((len(Result())) == (n))
+    Ensures(Forall(int, lambda d_2_i_:
+        not ((((d_2_i_) >= (0)) and ((d_2_i_) < (len(Result())))) and (((d_2_i_ % 2)) == (0))) or (((Result())[d_2_i_]) == (factorial__spec(d_2_i_ - 1)))))
+    Ensures(Forall(int, lambda d_3_i_:
+        not ((((d_3_i_) >= (0)) and ((d_3_i_) < (len(Result())))) and (((d_3_i_ % 2)) != (0))) or (((Result())[d_3_i_]) == (sum__spec(d_3_i_ - 1)))))
+
+    result: List[int] = []
+    d_4_i_ = 0
+    while (d_4_i_) < (n):
+        Invariant(0 <= d_4_i_ <= n)
+        Invariant(len(result) == d_4_i_)
+        Invariant(Acc(list_pred(result)))
+        Invariant(Forall(int, lambda i:Implies( (0 <= i < d_4_i_ and i % 2 == 0) , result[i] == factorial__spec(i - 1))))
+        Invariant(Forall(int, lambda i:Implies( (0 <= i < d_4_i_ and i % 2 != 0) , result[i] == sum__spec(i - 1))))
+
+        if ((d_4_i_ % 2)) == (0):
+            d_7_x_ = 1
+            d_8_j_ = 0
+            while (d_8_j_) < (d_4_i_):
+                Invariant(0 <= d_8_j_ <= d_4_i_)
+                Invariant(d_7_x_ == factorial__spec(d_8_j_ - 1))
+                d_7_x_ = (d_7_x_) * (d_8_j_ + 1)
+                d_8_j_ = (d_8_j_) + (1)
+            result = (result) + [d_7_x_]
+        else:
+            d_9_x_ = 0
+            d_10_j_ = 0
+            while (d_10_j_) < (d_4_i_):
+                Invariant(0 <= d_10_j_ <= d_4_i_)
+                Invariant(d_9_x_ == sum__spec(d_10_j_ - 1))
+                d_9_x_ = (d_9_x_) + (d_10_j_ + 1)
+                d_10_j_ = (d_10_j_) + (1)
+            result = (result) + [d_9_x_]
+        d_4_i_ = (d_4_i_) + (1)
+    return result
+        """
+    )
+
+    new_code = replace_inequalities(code)
+
+    compare_code = dedent(
+        """\
+from typing import cast, List, Dict, Set, Optional, Union
+from nagini_contracts.contracts import *
+
+@Pure
+def factorial__spec(n: int) -> int:
+    Requires(n >= -1)
+    Ensures(Result() >= 0)
+    if n == -1:
+        return 1
+    else:
+        return (n + 1) * factorial__spec(n - 1)
+
+@Pure
+def sum__spec(n: int) -> int:
+    Requires(n >= -1)
+    Ensures(Result() >= 0)
+    if 0 > n:
+        return 0
+    else:
+        return n + 1 + sum__spec(n - 1)
+
+def f(n: int) -> List[int]:
+    Requires(n >= 1)
+    Ensures(Acc(list_pred(Result())))
+    Ensures(len(Result()) == n)
+    Ensures(Forall(int, lambda d_2_i_: not ((d_2_i_ >= 0 and d_2_i_ < len(Result())) and d_2_i_ % 2 == 0) or Result()[d_2_i_] == factorial__spec(d_2_i_ - 1)))
+    Ensures(Forall(int, lambda d_3_i_: not ((d_3_i_ >= 0 and d_3_i_ < len(Result())) and d_3_i_ % 2 != 0) or Result()[d_3_i_] == sum__spec(d_3_i_ - 1)))
+    result: List[int] = []
+    d_4_i_ = 0
+    while d_4_i_ < n:
+        Invariant(0 <= d_4_i_ and d_4_i_ <= n)
+        Invariant(len(result) == d_4_i_)
+        Invariant(Acc(list_pred(result)))
+        Invariant(Forall(int, lambda i: Implies((0 <= i and i < d_4_i_) and i % 2 == 0, result[i] == factorial__spec(i - 1))))
+        Invariant(Forall(int, lambda i: Implies((0 <= i and i < d_4_i_) and i % 2 != 0, result[i] == sum__spec(i - 1))))
+        if d_4_i_ % 2 == 0:
+            d_7_x_ = 1
+            d_8_j_ = 0
+            while d_8_j_ < d_4_i_:
+                Invariant(0 <= d_8_j_ and d_8_j_ <= d_4_i_)
+                Invariant(d_7_x_ == factorial__spec(d_8_j_ - 1))
+                d_7_x_ = d_7_x_ * (d_8_j_ + 1)
+                d_8_j_ = d_8_j_ + 1
+            result = result + [d_7_x_]
+        else:
+            d_9_x_ = 0
+            d_10_j_ = 0
+            while d_10_j_ < d_4_i_:
+                Invariant(0 <= d_10_j_ and d_10_j_ <= d_4_i_)
+                Invariant(d_9_x_ == sum__spec(d_10_j_ - 1))
+                d_9_x_ = d_9_x_ + (d_10_j_ + 1)
+                d_10_j_ = d_10_j_ + 1
+            result = result + [d_9_x_]
+        d_4_i_ = d_4_i_ + 1
+    return result"""
+    )
+
+    assert new_code == compare_code
+
+def test_complicated1():
+
+    code = dedent("""\
+def BubbleSort(a1 : List[int]) -> List[int]:
+    Requires(Acc(list_pred(a1), 1/2))
+    Requires(Forall(int, lambda i : Implies(i >= 0 and i < len(a1), a1[i] >= 0)))
+    Ensures(Acc(list_pred(a1), 1/2))
+    Ensures(Acc(list_pred(Result())))
+    Ensures((len(a1)) == (len(Result())))
+    Ensures(Forall(int, lambda i : Implies(i >= 0 and i < len(Result()), Result()[i] >= 0)))
+    Ensures(Forall(int, lambda d_0_i_:
+        Forall(int, lambda d_1_j_:
+            Implies((((0) <= (d_0_i_)) and ((d_0_i_) < (d_1_j_))) and ((d_1_j_) < (len((Result())))), popcount((Result())[d_0_i_]) <= popcount((Result())[d_1_j_])))))
+
+    a = list(a1)
+    d_2_i_ = int(0)
+    d_2_i_ = (len((a))) - (1)
+    while (d_2_i_) > (0):
+        Invariant(Acc(list_pred(a)))
+        Invariant(0 <= d_2_i_ < len(a))
+        Invariant(Forall(int, lambda k: Implies(d_2_i_ < k and k < len(a), Forall(int, lambda m: Implies(0 <= m and m < k, popcount(a[m]) <= popcount(a[k]))))))
+        Invariant(Forall(int, lambda i: Implies(0 <= i and i < len(a), a[i] >= 0)))
+        Invariant(len(a) == len(a1))
+        Invariant(Forall(int, lambda i: Implies(0 <= i and i < len(a), Exists(int, lambda j: (0 <= j and j < len(a1) and a[i] == a1[j])))))
+        Invariant(Forall(int, lambda i: Implies(0 <= i and i < len(a1), Exists(int, lambda j: (0 <= j and j < len(a) and a1[i] == a[j])))))
+
+        d_7_j_ = int(0)
+        d_7_j_ = 0
+        while (d_7_j_) < (d_2_i_):
+            Invariant(Acc(list_pred(a)))
+            Invariant(0 <= d_7_j_ <= d_2_i_ < len(a))
+            Invariant(Forall(int, lambda k: Implies(0 <= k and k < d_7_j_, popcount(a[k]) <= popcount(a[d_7_j_]))))
+            Invariant(Forall(int, lambda i: Implies(0 <= i and i < len(a), a[i] >= 0)))
+            Invariant(len(a) == len(a1))
+            Invariant(Forall(int, lambda i: Implies(0 <= i and i < len(a), Exists(int, lambda j: (0 <= j and j < len(a1) and a[i] == a1[j])))))
+            Invariant(Forall(int, lambda i: Implies(0 <= i and i < len(a1), Exists(int, lambda j: (0 <= j and j < len(a) and a1[i] == a[j])))))
+
+            if popcount((a)[d_7_j_]) > popcount((a)[(d_7_j_) + (1)]):
+                rhs0_ = (a)[(d_7_j_) + (1)]
+                (a)[(d_7_j_) + (1)] = (a)[d_7_j_]
+                (a)[d_7_j_] = rhs0_
+            d_7_j_ = (d_7_j_) + (1)
+        d_2_i_ = (d_2_i_) - (1)
+    return a
+    """)
+
+    new_code = replace_inequalities(code)
+
+    print(new_code)