From 60f88bf4b0eca5ab0afa4da3751370336191dfce Mon Sep 17 00:00:00 2001 From: AlexShefY Date: Fri, 29 Nov 2024 11:05:58 +0100 Subject: [PATCH] refactoring --- .../ask_for_fixed.txt | 6 ++ .../ask_for_fixed_had_errors.txt | 6 ++ .../helpers.txt | 3 + .../invalid_helpers.txt | 9 ++ .../rewrite.txt | 87 +++++++++++++++++++ .../sys.txt | 4 + .../timeout.txt | 3 + verified_cogen/args.py | 10 ++- verified_cogen/experiments/incremental_run.py | 8 +- verified_cogen/main.py | 69 ++++++++++++--- verified_cogen/runners/invariants.py | 35 +++++++- .../runners/invariants_with_rewriting.py | 44 ---------- 12 files changed, 224 insertions(+), 60 deletions(-) create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed.txt create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed_had_errors.txt create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/helpers.txt create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/invalid_helpers.txt create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/rewrite.txt create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/sys.txt create mode 100644 prompts/humaneval-nagini-without-impls-few-shot-text-description/timeout.txt delete mode 100644 verified_cogen/runners/invariants_with_rewriting.py diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed.txt new file mode 100644 index 0000000..1631341 --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed.txt @@ -0,0 +1,6 @@ +The following errors occurred during verification: +{error} + +Please fix the error by adding, removing or modifying the implementation, invariants or assertions and return the fixed program. +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. \ No newline at end of file diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed_had_errors.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed_had_errors.txt new file mode 100644 index 0000000..9e6f1ab --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/ask_for_fixed_had_errors.txt @@ -0,0 +1,6 @@ +There are still some errors: +{error} + +Could you please fix them? +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. \ No newline at end of file diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/helpers.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/helpers.txt new file mode 100644 index 0000000..f090878 --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/helpers.txt @@ -0,0 +1,3 @@ +Generally, you should use helper functions (marked with @Pure annotation) only in invariants, asserts and conditions (in `if` or `while` conditions), not in the plain code. +But, the following helper functions you can use anywhere: {helpers}. +Do not change helper functions. \ No newline at end of file diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/invalid_helpers.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/invalid_helpers.txt new file mode 100644 index 0000000..2a397ac --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/invalid_helpers.txt @@ -0,0 +1,9 @@ +We detected an improper usage of helper functions. Here is the list of helper functions used in a wrong way: +{invalid_helpers} +You should use helper functions only in invariants, asserts and conditions (in `if` or `while` conditions), not in the plain code. +The following helper functions you can use anywhere: {helpers}. +We replaced all improper usages with `invalid_call()` and got the following program: +{program} +You should rewrite this program without changing pre/postconditions and helper functions (denoted with @Pure). +After rewriting your code should verify. +Your code should not contain any `invalid_call()` invocations. \ No newline at end of file diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/rewrite.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/rewrite.txt new file mode 100644 index 0000000..f3fa4b5 --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/rewrite.txt @@ -0,0 +1,87 @@ +Rewrite the following Nagini code with implementations of some functions missing. While rewriting it, ensure that it verifies. Include invariants and assertions. Don't remove any helper functions (they are marked with @Pure annotation), they are there to help you. Prefer loops to recursion. +Use helper functions only in invariants, asserts and conditions (in `if` or `while` conditions). Don't use helpers in the plain code. +Do not change helper functions. +Add code and invariants to other functions. +Ensure that the invariants are as comprehensive as they can be. +Even if you think some invariant is not totally necessary, better add it than not. +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. + + +You remember the following aspects of Nagini syntax: + +1. Nagini DOES NOT SUPPORT some Python features as list comprehensions (k + 1 for k in range(5)), as double inequalities (a <= b <= c). +Instead of double inequalities it's customary to use two separate inequalities (a <= b and b <= c). + +2. In Nagini method preconditions (Requires) and postconditions (Ensures) placed right after method signature, like here: +" +def Sum(a : List[int], s : int, t : int) -> int : + Requires(Acc(list_pred(a))) + Requires(((0) <= (s)) and ((s) <= (t)) and ((t) <= (len(a)))) + Ensures(Acc(list_pred(a))) + ... +" + +3. Invariant are placed right after `while` statement and before the code of `while` body: +" + while i < len(numbers): + Invariant(Acc(list_pred(numbers))) + Invariant(0 <= i and i <= len(numbers)) + s = s + numbers[i] +" +Invariants CANNOT be placed in any other position. +You remember that each invariant (and each expression) should contain equal number of opening and closing brackets, so that it is valid. +You should sustain balanced parentheses. + +4. Nagini requires special annotations for working with lists `Acc(list_pred(..))`. You can use these constructs only inside `Invariant`, +anywhere else you should not use `Acc()` or `list_pred()`: +" + while i < len(numbers): + Invariant(Acc(list_pred(numbers))) +" + +5. Nagini contains `Forall` and `Exists` constructs that can be used in invariants. First argument of Forall/Exists is typically a type (i.e `int`), +second argument is a lambda. `Forall(type, lambda x : a)` denotes that assertion `a` is true for every element `x` of type `type`. + +6. In Nagini `Implies(e1, a2)` plays role of implication. `Implies(e1, a2)` denotes that assertion a2 holds if boolean expression e1 is true. +You can use it inside invariants and asserts. + +You might need to work with accumulating functions, such as sum, so here's an example of how to do that: +``` +from typing import cast, List, Dict, Set, Optional, Union +from nagini_contracts.contracts import * + +@Pure +def Sum(a : List[int], s : int, t : int) -> int : + Requires(Acc(list_pred(a))) + Requires(((0) <= (s)) and ((s) <= (t)) and ((t) <= (len(a)))) + + if s == t: + return 0 + else: + return (a)[t - 1] + (Sum(a, s, t - 1)) + +def sum_loop(numbers: List[int]) -> int: + Requires(Acc(list_pred(numbers))) + Ensures(Acc(list_pred(numbers))) + Ensures(Result() == Sum(numbers, 0, len(numbers))) + s = int(0) + i = int(0) + while (i) < (len(numbers)): + Invariant(Acc(list_pred(numbers))) + Invariant(0 <= i and i <= len(numbers)) + Invariant(Forall(int, lambda d_1_p_: + (Implies(0 <= d_1_p_ and d_1_p_ < len(numbers), Sum(numbers, 0, d_1_p_ + 1) == Sum(numbers, 0, d_1_p_) + numbers[d_1_p_]), [[Sum(numbers, 0, d_1_p_ + 1)]]))) + Invariant(s == Sum(numbers, 0, i)) + Assert(Sum(numbers, 0, i + 1) == Sum(numbers, 0, i) + numbers[i]) + s = s + (numbers)[i] + i = i + 1 + return s +``` + +To help you, here's a text description given to a person who wrote this program: + +{text_description} + +The program: +{program} \ No newline at end of file diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/sys.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/sys.txt new file mode 100644 index 0000000..b462d86 --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/sys.txt @@ -0,0 +1,4 @@ +You are an expert in a Python verification framework Nagini. +You will be given tasks dealing with Python programs including precise annotations. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. +You respond only with code blocks. \ No newline at end of file diff --git a/prompts/humaneval-nagini-without-impls-few-shot-text-description/timeout.txt b/prompts/humaneval-nagini-without-impls-few-shot-text-description/timeout.txt new file mode 100644 index 0000000..f69fd69 --- /dev/null +++ b/prompts/humaneval-nagini-without-impls-few-shot-text-description/timeout.txt @@ -0,0 +1,3 @@ +The verifier timed out during the verification. +This usually means that the provided invariants were too broad or were difficult to check. +Could you please try to improve the invariants and try again? diff --git a/verified_cogen/args.py b/verified_cogen/args.py index 730aaeb..5cc6c04 100644 --- a/verified_cogen/args.py +++ b/verified_cogen/args.py @@ -1,6 +1,6 @@ import argparse import os -from typing import Optional, no_type_check +from typing import Optional, no_type_check, List from verified_cogen.tools.modes import VALID_MODES @@ -26,6 +26,7 @@ class ProgramArgs: remove_conditions: bool remove_implementations: bool include_text_descriptions: bool + manual_rewriters: List[str] @no_type_check def __init__(self, args): @@ -49,6 +50,7 @@ def __init__(self, args): self.remove_conditions = args.remove_conditions self.remove_implementations = args.remove_implementations self.include_text_descriptions = args.include_text_descriptions + self.manual_rewriters = args.manual_rewriters def get_default_parser(): @@ -129,6 +131,12 @@ def get_default_parser(): default=False, action="store_true", ) + parser.add_argument( + "--manual-rewriters", + help="Manual rewriters for additional program modifications", + default=[], + nargs="+", + ) return parser diff --git a/verified_cogen/experiments/incremental_run.py b/verified_cogen/experiments/incremental_run.py index 0b1926a..1f9cc7e 100644 --- a/verified_cogen/experiments/incremental_run.py +++ b/verified_cogen/experiments/incremental_run.py @@ -5,7 +5,7 @@ from verified_cogen.args import ProgramArgs, get_default_parser from verified_cogen.llm.llm import LLM -from verified_cogen.main import make_runner_cls +from verified_cogen.main import make_runner_cls, construct_rewriter from verified_cogen.runners import RunnerConfig from verified_cogen.runners.languages import AnnotationType, register_basic_languages from verified_cogen.tools import ( @@ -36,6 +36,7 @@ def main(): "--ignore-failed", help="Ignore failed files", action="store_true" ) args = IncrementalRunArgs(parser.parse_args()) + print(args.manual_rewriters) all_removed = [AnnotationType.INVARIANTS, AnnotationType.ASSERTS] if args.remove_conditions: @@ -85,9 +86,12 @@ def main(): args.prompts_directory, args.temperature, ) + rewriter = construct_rewriter( + extension_from_file_list([file]), args.manual_rewriters + ) runner = make_runner_cls( args.bench_type, extension_from_file_list([file]), config - )(llm, logger, verifier) + )(llm, logger, verifier, rewriter) display_name = rename_file(file) marker_name = str(file.relative_to(directory)) if ( diff --git a/verified_cogen/main.py b/verified_cogen/main.py index b8a4c9e..d995f99 100644 --- a/verified_cogen/main.py +++ b/verified_cogen/main.py @@ -2,18 +2,24 @@ import pathlib from logging import Logger from pathlib import Path -from typing import Callable +from typing import Callable, List, Optional from verified_cogen.args import ProgramArgs, get_args from verified_cogen.llm import LLM from verified_cogen.runners import Runner, RunnerConfig +from verified_cogen.runners.flush import FlushRunner from verified_cogen.runners.generate import GenerateRunner from verified_cogen.runners.generic import GenericRunner from verified_cogen.runners.invariants import InvariantRunner from verified_cogen.runners.languages import register_basic_languages from verified_cogen.runners.languages.language import AnnotationType, LanguageDatabase +from verified_cogen.runners.rewriters import Rewriter +from verified_cogen.runners.rewriters.nagini_rewriter import NaginiRewriter +from verified_cogen.runners.rewriters.nagini_rewriter_fixing import NaginiRewriterFixing +from verified_cogen.runners.rewriters.nagini_rewriter_fixing_ast import ( + NaginiRewriterFixingAST, +) from verified_cogen.runners.step_by_step import StepByStepRunner -from verified_cogen.runners.step_by_step_flush import StepByStepFlushRunner from verified_cogen.runners.validating import ValidatingRunner from verified_cogen.tools import ( ext_glob, @@ -33,9 +39,10 @@ def run_once( files: list[Path], args: ProgramArgs, - runner_cls: Callable[[LLM, Logger, Verifier], Runner], + runner_cls: Callable[[LLM, Logger, Verifier, Optional[Rewriter]], Runner], verifier: Verifier, mode: Mode, + rewriter: Optional[Rewriter], is_once: bool, ) -> tuple[int, int, int, dict[str, int]]: _init: tuple[list[str], list[str], list[str]] = ([], [], []) @@ -51,7 +58,7 @@ def run_once( args.temperature, ) - runner = runner_cls(llm, logger, verifier) + runner = runner_cls(llm, logger, verifier, rewriter) retries = args.retries + 1 tries = None @@ -96,10 +103,36 @@ def run_once( return len(success_zero_tries), len(success), len(failed), cnt +def construct_nagini_rewriter(runner_types: List[str]) -> Optional[Runner]: + runner = None + for runner_type in runner_types: + if runner_type == "NaginiRewriter": + runner = NaginiRewriter() + elif runner_type == "NaginiRewriterFixing": + runner = NaginiRewriterFixing(runner) + elif runner_type == "NaginiRewriterFixingAST": + runner = NaginiRewriterFixingAST(runner) + else: + raise ValueError(f"Unexpected nagini rewriter type: {runner_type}") + return runner + + +def construct_rewriter(extension: str, runner_types: List[str]) -> Optional[Runner]: + if extension == "py": + return construct_nagini_rewriter(runner_types) + if runner_types: + raise ValueError( + f"Not implemented rewriters for language: {LanguageDatabase().regularise[extension]}" + ) + return None + + def make_runner_cls( bench_type: str, extension: str, config: RunnerConfig -) -> Callable[[LLM, Logger, Verifier], Runner]: - def runner_cls(llm: LLM, logger: Logger, verifier: Verifier): +) -> Callable[[LLM, Logger, Verifier, Optional[Rewriter]], Runner]: + def runner_cls( + llm: LLM, logger: Logger, verifier: Verifier, rewriter: Optional[Rewriter] + ): if bench_type == "invariants": return InvariantRunner(llm, logger, verifier, config) elif bench_type == "generic": @@ -108,17 +141,23 @@ def runner_cls(llm: LLM, logger: Logger, verifier: Verifier): return GenerateRunner(llm, logger, verifier, config) elif bench_type == "validating": return ValidatingRunner( - InvariantRunner(llm, logger, verifier, config), + InvariantRunner(llm, logger, verifier, config, rewriter), LanguageDatabase().get(extension), ) elif bench_type == "step-by-step": return ValidatingRunner( - StepByStepRunner(InvariantRunner(llm, logger, verifier, config)), + StepByStepRunner( + InvariantRunner(llm, logger, verifier, config, rewriter) + ), LanguageDatabase().get(extension), ) elif bench_type == "step-by-step-flush": return ValidatingRunner( - StepByStepFlushRunner(InvariantRunner(llm, logger, verifier, config)), + FlushRunner( + StepByStepRunner( + InvariantRunner(llm, logger, verifier, config, rewriter) + ) + ), LanguageDatabase().get(extension), ) else: @@ -152,13 +191,18 @@ def main(): verifier = Verifier(args.verifier_command, args.verifier_timeout) config = RunnerConfig( - log_tries=log_tries, include_text_descriptions=args.include_text_descriptions + log_tries=log_tries, + include_text_descriptions=args.include_text_descriptions, + remove_implementations=args.remove_implementations, ) if args.dir is not None: files = sorted(list(pathlib.Path(args.dir).glob(ext_glob(args.filter_by_ext)))) runner_cls = make_runner_cls( args.bench_type, extension_from_file_list(files), config ) + rewriter = construct_rewriter( + extension_from_file_list(files), args.manual_rewriters + ) runner = runner_cls( LLM( args.grazie_token, @@ -168,6 +212,7 @@ def main(): ), logger, verifier, + rewriter, ) for file in files: with open(file) as f: @@ -175,14 +220,14 @@ def main(): if args.runs == 1: _, _, _, total_cnt = run_once( - files, args, runner_cls, verifier, mode, is_once=True + files, args, runner_cls, verifier, mode, rewriter, is_once=True ) else: success_zero_tries, success, failed = 0, 0, 0 total_cnt = {rename_file(f): 0 for f in files} for _ in range(args.runs): s0, s, f, cnt = run_once( - files, args, runner_cls, verifier, mode, is_once=False + files, args, runner_cls, verifier, mode, rewriter, is_once=False ) success_zero_tries += s0 success += s diff --git a/verified_cogen/runners/invariants.py b/verified_cogen/runners/invariants.py index 19bcb4c..a2d9e59 100644 --- a/verified_cogen/runners/invariants.py +++ b/verified_cogen/runners/invariants.py @@ -5,8 +5,10 @@ from typing_extensions import Optional from verified_cogen.llm import LLM -from verified_cogen.runners import Runner +from verified_cogen.runners import Runner, RunnerConfig +from verified_cogen.runners.rewriters import Rewriter from verified_cogen.tools.modes import Mode +from verified_cogen.tools.verifier import Verifier logger = logging.getLogger(__name__) @@ -44,6 +46,20 @@ def insert_invariants(llm: LLM, prg: str, inv: str, mode: Mode): class InvariantRunner(Runner): + rewriter: Optional[Rewriter] + previous_prg: Optional[str] = None + + def __init__( + self, + llm: LLM, + logger: logging.Logger, + verifier: Verifier, + config: RunnerConfig, + rewriter: Optional[Rewriter] = None, + ): + super().__init__(llm, logger, verifier, config) + self.rewriter = rewriter + def rewrite( self, prg: str, @@ -67,3 +83,20 @@ def precheck(self, prg: str, mode: Mode): raise ValueError( "Multiple loops in program, not supported in regex mode" ) + + def postprocess(self, inv_prg: str) -> str: + if self.rewriter is not None: + prg, prompt = self.rewriter.rewrite(inv_prg) + + if prompt != "": + self.logger.info("Manually rewrite:") + self.logger.info(inv_prg) + self.logger.info("Manual rewriting results:") + self.logger.info(prompt) + self.llm.add_user_prompt(prompt) + else: + prg = super().postprocess(inv_prg) + + self.previous_prg = prg + + return prg diff --git a/verified_cogen/runners/invariants_with_rewriting.py b/verified_cogen/runners/invariants_with_rewriting.py deleted file mode 100644 index cd6a533..0000000 --- a/verified_cogen/runners/invariants_with_rewriting.py +++ /dev/null @@ -1,44 +0,0 @@ -from logging import Logger -from typing import Optional - -from verified_cogen.llm import LLM -from verified_cogen.runners import RunnerConfig -from verified_cogen.runners.invariants import InvariantRunner -from verified_cogen.runners.rewriters import Rewriter -from verified_cogen.tools import rewrite_error -from verified_cogen.tools.verifier import Verifier - - -class InvariantsWithRewriting(InvariantRunner): - rewriter: Rewriter - previous_prg: Optional[str] = None - - def __init__( - self, - llm: LLM, - logger: Logger, - verifier: Verifier, - config: RunnerConfig, - rewriter: Rewriter, - ): - super().__init__(llm, logger, verifier, config) - self.rewriter = rewriter - - def postprocess(self, inv_prg: str) -> str: - prg, prompt = self.rewriter.rewrite(inv_prg) - - if prompt != "": - self.logger.info("Manually rewrite:") - self.logger.info(inv_prg) - self.logger.info("Manual rewriting results:") - self.logger.info(prompt) - self.llm.add_user_prompt(prompt) - - self.previous_prg = prg - - return prg - - def ask_for_fixed(self, err: str) -> str: - assert self.previous_prg is not None - modified_err = rewrite_error(self.previous_prg, err) - return super().ask_for_fixed(modified_err)