From 72d65dbba8b9d3bd1f9994d8b0e954eb74d64f50 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Sat, 13 Jul 2024 22:00:28 +0100 Subject: [PATCH 01/18] Moved chat classes into chats directory --- esbmc_ai/__main__.py | 2 +- esbmc_ai/chats/__init__.py | 13 +++++++ esbmc_ai/{ => chats}/base_chat_interface.py | 4 +- .../latest_state_solution_generator.py | 2 +- esbmc_ai/{ => chats}/solution_generator.py | 2 +- esbmc_ai/{ => chats}/user_chat.py | 2 +- esbmc_ai/commands/fix_code_command.py | 7 +--- esbmc_ai/optimize_code.py | 37 ------------------- esbmc_ai/reverse_order_solution_generator.py | 2 +- 9 files changed, 22 insertions(+), 49 deletions(-) create mode 100644 esbmc_ai/chats/__init__.py rename esbmc_ai/{ => chats}/base_chat_interface.py (97%) rename esbmc_ai/{ => chats}/latest_state_solution_generator.py (93%) rename esbmc_ai/{ => chats}/solution_generator.py (99%) rename esbmc_ai/{ => chats}/user_chat.py (98%) delete mode 100644 esbmc_ai/optimize_code.py diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index 8c528d3..a89c4a8 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -30,7 +30,7 @@ ) from esbmc_ai.loading_widget import LoadingWidget, create_loading_widget -from esbmc_ai.user_chat import UserChat +from esbmc_ai.chats import UserChat from esbmc_ai.logging import print_horizontal_line, printv, printvv from esbmc_ai.esbmc_util import esbmc from esbmc_ai.chat_response import FinishReason, ChatResponse diff --git a/esbmc_ai/chats/__init__.py b/esbmc_ai/chats/__init__.py new file mode 100644 index 0000000..e2b4c2a --- /dev/null +++ b/esbmc_ai/chats/__init__.py @@ -0,0 +1,13 @@ +# Author: Yiannis Charalambous + +from .base_chat_interface import BaseChatInterface +from .latest_state_solution_generator import LatestStateSolutionGenerator +from .solution_generator import SolutionGenerator +from .user_chat import UserChat + +__all__ = [ + "BaseChatInterface", + "LatestStateSolutionGenerator", + "SolutionGenerator", + "UserChat", +] diff --git a/esbmc_ai/base_chat_interface.py b/esbmc_ai/chats/base_chat_interface.py similarity index 97% rename from esbmc_ai/base_chat_interface.py rename to esbmc_ai/chats/base_chat_interface.py index e6fc41f..6344734 100644 --- a/esbmc_ai/base_chat_interface.py +++ b/esbmc_ai/chats/base_chat_interface.py @@ -13,8 +13,8 @@ ) from esbmc_ai.config import ChatPromptSettings -from .chat_response import ChatResponse, FinishReason -from .ai_models import AIModel +from esbmc_ai.chat_response import ChatResponse, FinishReason +from esbmc_ai.ai_models import AIModel class BaseChatInterface(object): diff --git a/esbmc_ai/latest_state_solution_generator.py b/esbmc_ai/chats/latest_state_solution_generator.py similarity index 93% rename from esbmc_ai/latest_state_solution_generator.py rename to esbmc_ai/chats/latest_state_solution_generator.py index 81d15a5..0bda6f8 100644 --- a/esbmc_ai/latest_state_solution_generator.py +++ b/esbmc_ai/chats/latest_state_solution_generator.py @@ -2,7 +2,7 @@ from typing_extensions import override from langchain_core.messages import BaseMessage -from esbmc_ai.solution_generator import SolutionGenerator +from esbmc_ai.chats.solution_generator import SolutionGenerator from esbmc_ai.chat_response import FinishReason # TODO Test me diff --git a/esbmc_ai/solution_generator.py b/esbmc_ai/chats/solution_generator.py similarity index 99% rename from esbmc_ai/solution_generator.py rename to esbmc_ai/chats/solution_generator.py index 95b8eec..c5351fc 100644 --- a/esbmc_ai/solution_generator.py +++ b/esbmc_ai/chats/solution_generator.py @@ -10,7 +10,7 @@ from esbmc_ai.config import ChatPromptSettings, DynamicAIModelAgent from esbmc_ai.frontend.solution import apply_line_patch -from .ai_models import AIModel +from esbmc_ai.ai_models import AIModel from .base_chat_interface import BaseChatInterface from esbmc_ai.esbmc_util import ( esbmc_get_counter_example, diff --git a/esbmc_ai/user_chat.py b/esbmc_ai/chats/user_chat.py similarity index 98% rename from esbmc_ai/user_chat.py rename to esbmc_ai/chats/user_chat.py index 0e1bbfd..3c1cdd6 100644 --- a/esbmc_ai/user_chat.py +++ b/esbmc_ai/chats/user_chat.py @@ -9,8 +9,8 @@ from langchain.schema import BaseMessage, SystemMessage from esbmc_ai.config import AIAgentConversation, ChatPromptSettings +from esbmc_ai.ai_models import AIModel -from .ai_models import AIModel from .base_chat_interface import BaseChatInterface diff --git a/esbmc_ai/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py index 97abcab..e0b6a5d 100644 --- a/esbmc_ai/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -5,7 +5,8 @@ from typing_extensions import override from esbmc_ai.chat_response import FinishReason -from esbmc_ai.latest_state_solution_generator import LatestStateSolutionGenerator +from esbmc_ai.chats import LatestStateSolutionGenerator, SolutionGenerator +from esbmc_ai.chats.solution_generator import ESBMCTimedOutException from esbmc_ai.reverse_order_solution_generator import ReverseOrderSolutionGenerator from .chat_command import ChatCommand @@ -16,10 +17,6 @@ esbmc_get_error_type, esbmc_load_source_code, ) -from ..solution_generator import ( - ESBMCTimedOutException, - SolutionGenerator, -) from ..logging import print_horizontal_line, printv, printvv diff --git a/esbmc_ai/optimize_code.py b/esbmc_ai/optimize_code.py deleted file mode 100644 index 092c2ce..0000000 --- a/esbmc_ai/optimize_code.py +++ /dev/null @@ -1,37 +0,0 @@ -# Author: Yiannis Charalambous - -from langchain.base_language import BaseLanguageModel -from langchain.schema import AIMessage, BaseMessage, HumanMessage - -from esbmc_ai.config import ChatPromptSettings -from .base_chat_interface import BaseChatInterface, ChatResponse -from .ai_models import AIModel - - -class OptimizeCode(BaseChatInterface): - initial_message: str - - def __init__( - self, - ai_model_agent: ChatPromptSettings, - initial_message: str, - ai_model: AIModel, - llm: BaseLanguageModel, - ) -> None: - super().__init__(ai_model_agent=ai_model_agent, ai_model=ai_model, llm=llm) - - self.initial_message = initial_message - - def optimize_function(self, source_code: str, function_name: str) -> ChatResponse: - self.messages = [] - self.push_to_message_stack( - HumanMessage( - content=f"Reply OK if you understand the following is the source code to optimize:\n\n{source_code}" - ) - ) - self.push_to_message_stack(AIMessage(content="OK.")) - - expanded_initial_message: str = self.initial_message.replace( - "%s", function_name - ) - return self.send_message(expanded_initial_message) diff --git a/esbmc_ai/reverse_order_solution_generator.py b/esbmc_ai/reverse_order_solution_generator.py index bd13d46..203b2ea 100644 --- a/esbmc_ai/reverse_order_solution_generator.py +++ b/esbmc_ai/reverse_order_solution_generator.py @@ -2,7 +2,7 @@ from langchain.schema import BaseMessage from typing_extensions import override, Optional -from esbmc_ai.solution_generator import SolutionGenerator +from esbmc_ai.chats.solution_generator import SolutionGenerator from esbmc_ai.chat_response import ChatResponse # TODO Test me From f7926b2b4020ddeb7ba8cb7c0753bb70156d96b3 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Tue, 23 Jul 2024 13:25:37 +0100 Subject: [PATCH 02/18] Solution has been overhauled: SourceFile and Solution classes to represent solution --- esbmc_ai/frontend/solution.py | 61 -------------- esbmc_ai/solution.py | 149 ++++++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 61 deletions(-) delete mode 100644 esbmc_ai/frontend/solution.py create mode 100644 esbmc_ai/solution.py diff --git a/esbmc_ai/frontend/solution.py b/esbmc_ai/frontend/solution.py deleted file mode 100644 index 3437ddf..0000000 --- a/esbmc_ai/frontend/solution.py +++ /dev/null @@ -1,61 +0,0 @@ -# Author: Yiannis Charalambous - -"""# Solution -Keeps track of all the source files that ESBMC-AI is targeting.""" - -from typing import NamedTuple - - -class SourceFile(NamedTuple): - file_path: str - content: str - - -_main_source_file: SourceFile = SourceFile("", "") -_source_files: set[SourceFile] = set() - - -def add_source_file(source_file: SourceFile) -> None: - global _source_files - _source_files.add(source_file) - - -def set_main_source_file(source_file: SourceFile) -> None: - add_source_file(source_file) - global _main_source_file - _main_source_file = source_file - - -def get_main_source_file_path() -> str: - global _main_source_file - return _main_source_file.file_path - - -def get_main_source_file() -> SourceFile: - global _main_source_file - return _main_source_file - - -def get_source_files() -> list[SourceFile]: - global _source_files - return list(_source_files) - - -def apply_line_patch(source_code: str, patch: str, start: int, end: int) -> str: - """Applies a patch to the source code. - - To replace a single line, start and end are equal. - - Args: - * source_code - The source code to apply the patch to. - * patch - Can be a line or multiple lines but will replace the start and - end region defined. - * start - Line index to mark start of replacement. - * end - Marks the end of the region where the patch will be applied to. - End is non-inclusive.""" - assert ( - start <= end - ), f"start ({start}) needs to be less than or equal to end ({end})" - lines: list[str] = source_code.splitlines() - lines = lines[:start] + [patch] + lines[end + 1 :] - return "\n".join(lines) diff --git a/esbmc_ai/solution.py b/esbmc_ai/solution.py new file mode 100644 index 0000000..963d6af --- /dev/null +++ b/esbmc_ai/solution.py @@ -0,0 +1,149 @@ +# Author: Yiannis Charalambous + +"""# Solution +Keeps track of all the source files that ESBMC-AI is targeting. """ + +from typing import Optional +from pathlib import Path + + +class SourceFile(object): + def __init__(self, file_path: Optional[Path], content: str) -> None: + self._file_path: Optional[Path] = file_path + # Content file shows the file throughout the repair process. Index 0 is + # the orignial. + self._content: list[str] = [content] + # Map _content (each iteration) to esbmc output + self._esbmc_output: dict[int, str] = {} + + @property + def file_path(self) -> Optional[Path]: + return self._file_path + + @property + def initial_content(self) -> str: + return self._content[0] + + @property + def latest_content(self) -> str: + return self._content[-1] + + @property + def content(self) -> tuple[str, ...]: + return tuple(self._content) + + @property + def initial_verifier_output(self) -> str: + assert 0 in self._esbmc_output, "Error: No initial verifier output assigned." + return self._esbmc_output[0] + + @property + def latest_verifier_output(self) -> str: + return self._esbmc_output[len(self._content) - 1] + + @property + def verifier_output(self) -> dict[int, str]: + return self._esbmc_output + + def get_patch(self, index_a: int, index_b: int) -> str: + """Return diff between `index_a` and `index_b` which are indicies + referencing the content list.""" + raise NotImplementedError() + + def push_line_path(self, patch: str, start: int, end: int) -> str: + """Calls `apply_line_patch` using the latest source code and then pushes + the new patch to the content.""" + new_source: str = SourceFile.apply_line_patch( + source_code=self._content[-1], + patch=patch, + start=start, + end=end, + ) + self._content.append(new_source) + return new_source + + def update_content(self, content: str, reset_changes: bool = False) -> None: + if reset_changes: + self._content = [content] + else: + self._content.append(content) + + def assign_verifier_output(self, verifier_output: str, index: int = -1) -> None: + if index < 0: + index = len(self._content) - 1 + self._esbmc_output[index] = verifier_output + + @classmethod + def apply_line_patch( + cls, source_code: str, patch: str, start: int, end: int + ) -> str: + """Applies a patch to the source code. + + To replace a single line, start and end are equal. + + Args: + * source_code - The source code to apply the patch to. + * patch - Can be a line or multiple lines but will replace the start and + end region defined. + * start - Line index to mark start of replacement. + * end - Marks the end of the region where the patch will be applied to. + End is non-inclusive.""" + assert ( + start <= end + ), f"start ({start}) needs to be less than or equal to end ({end})" + lines: list[str] = source_code.splitlines() + lines = lines[:start] + [patch] + lines[end + 1 :] + return "\n".join(lines) + + +class Solution(object): + def __init__(self, files: list[Path] = []) -> None: + self._files: list[SourceFile] = [] + for file_path in files: + with open(file_path, "r") as file: + self._files.append(SourceFile(file_path, file.read())) + + @property + def files(self) -> tuple[SourceFile, ...]: + return tuple(self._files) + + @property + def files_mapped(self) -> dict[Path, SourceFile]: + """Will return the files mapped to their directory.""" + return { + source_file.file_path: source_file + for source_file in self._files + if source_file.file_path + } + + def add_source_file( + self, file_path: Optional[Path], content: Optional[str] + ) -> None: + if file_path: + if content: + self._files.append(SourceFile(file_path, content)) + else: + with open(file_path, "r") as file: + self._files.append(SourceFile(file_path, file.read())) + return + + if content: + self._files.append(SourceFile(file_path, content)) + return + + raise RuntimeError("file_path and content cannot be both invalid!") + + +# Define a global solution (is not required to be used) + +_solution: Solution = Solution() + + +def init_solution(solution: Solution) -> Solution: + global _solution + _solution = solution + return _solution + + +def get_solution() -> Solution: + return _solution From 74f164cc1449f4097ef2c027c007e5fd1893a62a Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Tue, 23 Jul 2024 13:26:19 +0100 Subject: [PATCH 03/18] Convert internal path arguments to use Path class from pathlib --- esbmc_ai/esbmc_util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 4a12013..0ee5b68 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -91,13 +91,13 @@ def get_clang_err_line_index(clang_output: str) -> Optional[int]: return None -def esbmc(path: str, esbmc_params: list, timeout: Optional[float] = None): +def esbmc(path: Path, esbmc_params: list, timeout: Optional[float] = None): """Exit code will be 0 if verification successful, 1 if verification failed. And any other number for compilation error/general errors.""" # Build parameters esbmc_cmd = [config.esbmc_path] esbmc_cmd.extend(esbmc_params) - esbmc_cmd.append(path) + esbmc_cmd.append(str(path)) if "--timeout" in esbmc_cmd: print( @@ -123,7 +123,7 @@ def esbmc(path: str, esbmc_params: list, timeout: Optional[float] = None): def esbmc_load_source_code( - file_path: str, + file_path: Path, source_code: str, esbmc_params: list = config.esbmc_params, auto_clean: bool = config.temp_auto_clean, @@ -137,7 +137,7 @@ def esbmc_load_source_code( os.mkdir(config.temp_file_dir) delete_path = True - temp_file_path = f"{config.temp_file_dir}{os.sep}{source_code_path.name}" + temp_file_path = Path(config.temp_file_dir) / source_code_path.name # Create temp file. with open(temp_file_path, "w") as file: @@ -146,7 +146,7 @@ def esbmc_load_source_code( file.flush() # Call ESBMC to temporary folder. - results = esbmc(file.name, esbmc_params, timeout=timeout) + results = esbmc(temp_file_path, esbmc_params, timeout=timeout) # Delete temp files and path if auto_clean: From 52d03b01ea3a36981d6ef6638281490af076de7e Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Tue, 23 Jul 2024 13:26:54 +0100 Subject: [PATCH 04/18] Implemented SourceFile and Solution in the rest of the program --- esbmc_ai/__main__.py | 173 ++++++++++++++++---------- esbmc_ai/chats/solution_generator.py | 6 +- esbmc_ai/commands/fix_code_command.py | 33 ++--- tests/test_solution.py | 9 +- 4 files changed, 133 insertions(+), 88 deletions(-) diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index a89c4a8..fe9b97c 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -3,11 +3,15 @@ # Author: Yiannis Charalambous 2023 import os +from pathlib import Path import re import sys # Enables arrow key functionality for input(). Do not remove import. import readline +from typing import Optional + +_ = readline import argparse from langchain.base_language import BaseLanguageModel @@ -15,12 +19,7 @@ import esbmc_ai.config as config from esbmc_ai import __author__, __version__ -from esbmc_ai.frontend.solution import ( - SourceFile, - get_main_source_file, - set_main_source_file, - get_main_source_file_path, -) +from esbmc_ai.solution import SourceFile, Solution, get_solution, init_solution from esbmc_ai.commands import ( ChatCommand, @@ -123,7 +122,34 @@ def init_commands_list() -> None: def update_solution(source_code: str) -> None: - set_main_source_file(SourceFile(get_main_source_file_path(), source_code)) + get_solution().files[0].update_content(content=source_code, reset_changes=True) + + +def _run_esbmc(source_file: SourceFile, anim: Optional[LoadingWidget] = None) -> str: + assert source_file.file_path + + if anim: + anim.start("ESBMC is processing... Please Wait") + exit_code, esbmc_output = esbmc( + path=source_file.file_path, + esbmc_params=config.esbmc_params, + timeout=config.verifier_timeout, + ) + if anim: + anim.stop() + + # ESBMC will output 0 for verification success and 1 for verification + # failed, if anything else gets thrown, it's an ESBMC error. + if not config.allow_successful and exit_code == 0: + printv("Success!") + print(esbmc_output) + sys.exit(0) + elif exit_code != 0 and exit_code != 1: + printv(f"ESBMC exit code: {exit_code}") + printv(f"ESBMC Output:\n\n{esbmc_output}") + sys.exit(1) + + return esbmc_output def init_commands() -> None: @@ -137,25 +163,34 @@ def init_commands() -> None: fix_code_command.on_solution_signal.add_listener(update_solution) -def _run_command_mode( - command: ChatCommand, - args: list[str], - esbmc_output: str, - source_code: str, -) -> None: +def _run_command_mode(command: ChatCommand, args: argparse.Namespace) -> None: + path_arg: Path = Path(args.filename) + + solution: Solution + if path_arg.is_dir(): + solution = init_solution( + Solution( + [path for path in path_arg.glob("**/*") if path.is_file() and path.name] + ) + ) + else: + solution = init_solution(Solution([path_arg])) + match command.command_name: case fix_code_command.command_name: - error, solution = fix_code_command.execute( - file_name=get_main_source_file_path(), - source_code=source_code, - esbmc_output=esbmc_output, - ) + for source_file in solution.files: + # Run ESBMC first round + esbmc_output: str = _run_esbmc(source_file) + source_file.assign_verifier_output(esbmc_output) - if error: - print("Failed all attempts...") - sys.exit(1) - else: - print(solution) + error, source_file_solution = fix_code_command.execute( + source_file=source_file + ) + + if error: + print("Failed all attempts...") + else: + print(source_file_solution) case _: command.execute() sys.exit(0) @@ -249,7 +284,7 @@ def main() -> None: + "}", ) - args = parser.parse_args() + args: argparse.Namespace = parser.parse_args() print(f"ESBMC-AI {__version__}") print(f"Made by {__author__}") @@ -270,37 +305,7 @@ def main() -> None: printv("Reading source code...") print(f"Running ESBMC with {config.esbmc_params}\n") - # Cast to string (for language servers) - args.filename = str(args.filename) - - # Read source code - with open(args.filename, mode="r") as file: - # Add the main source file to the solution explorer. - set_main_source_file(SourceFile(args.filename, file.read())) - - anim.start("ESBMC is processing... Please Wait") - exit_code, esbmc_output = esbmc( - path=get_main_source_file_path(), - esbmc_params=config.esbmc_params, - timeout=config.verifier_timeout, - ) - anim.stop() - - # Print verbose lvl 2 - print_horizontal_line(2) - printvv(esbmc_output) - print_horizontal_line(2) - - # ESBMC will output 0 for verification success and 1 for verification - # failed, if anything else gets thrown, it's an ESBMC error. - if not config.allow_successful and exit_code == 0: - print("Success!") - print(esbmc_output) - sys.exit(0) - elif exit_code != 0 and exit_code != 1: - print(f"ESBMC exit code: {exit_code}") - print(f"ESBMC Output:\n\n{esbmc_output}") - sys.exit(1) + assert isinstance(args.filename, str) # Command mode: Check if command is called and call it. # If not, then continue to user mode. @@ -310,14 +315,45 @@ def main() -> None: print("Running Command:", command) for idx, command_name in enumerate(command_names): if command == command_name: - _run_command_mode( - command=commands[idx], - args=[], # NOTE: Currently not supported... - source_code=get_main_source_file().content, - esbmc_output=esbmc_output, - ) + _run_command_mode(command=commands[idx], args=args) sys.exit(0) + # =========================================== + # User Mode (Supports only 1 file) + # =========================================== + + # Init Solution + solution: Solution + # Determine if we are processing one file versus multiple files + path_arg: Path = Path(args.filename) + if path_arg.is_dir(): + # Load only files. + print( + "Processing multiple files is not supported in User Mode." + "Call a command using -c to process directories" + ) + sys.exit(1) + else: + # Add the main source file to the solution explorer. + solution = init_solution(Solution([path_arg])) + del path_arg + + # Assert that we have one file and one filepath + assert len(solution.files) == 1 + + source_file: SourceFile = solution.files[0] + assert source_file.file_path + + esbmc_output: str = _run_esbmc(source_file, anim) + + # Print verbose lvl 2 + print_horizontal_line(2) + printvv(esbmc_output) + print_horizontal_line(2) + + source_file.assign_verifier_output(esbmc_output) + del esbmc_output + printv(f"Initializing the LLM: {config.ai_model.name}\n") chat_llm: BaseLanguageModel = config.ai_model.create_llm( api_keys=config.api_keys, @@ -332,8 +368,8 @@ def main() -> None: ai_model_agent=config.chat_prompt_user_mode, ai_model=config.ai_model, llm=chat_llm, - source_code=get_main_source_file().content, - esbmc_output=esbmc_output, + source_code=source_file.latest_content, + esbmc_output=source_file.latest_verifier_output, set_solution_messages=config.chat_prompt_user_mode.scenarios["set_solution"], ) @@ -345,7 +381,6 @@ def main() -> None: if len(config.chat_prompt_user_mode.initial_prompt) > 0: printv("Using initial prompt from file...\n") anim.start("Model is parsing ESBMC output... Please Wait") - # TODO Make protected response = chat.send_message( message=config.chat_prompt_user_mode.initial_prompt, ) @@ -375,17 +410,17 @@ def main() -> None: print() print("ESBMC-AI will generate a fix for the code...") - error, solution = fix_code_command.execute( - file_name=get_main_source_file_path(), - source_code=get_main_source_file().content, - esbmc_output=esbmc_output, + error: bool + source_file_solution: str + error, source_file_solution = fix_code_command.execute( + source_file=source_file ) if not error: print( "\n\nESBMC-AI: Here is the corrected code, verified with ESBMC:" ) - print(f"```\n{solution}\n```") + print(f"```\n{source_file_solution}\n```") continue else: # Commands without parameters or returns are handled automatically. diff --git a/esbmc_ai/chats/solution_generator.py b/esbmc_ai/chats/solution_generator.py index c5351fc..6187ee3 100644 --- a/esbmc_ai/chats/solution_generator.py +++ b/esbmc_ai/chats/solution_generator.py @@ -8,7 +8,7 @@ from esbmc_ai.chat_response import ChatResponse, FinishReason from esbmc_ai.config import ChatPromptSettings, DynamicAIModelAgent -from esbmc_ai.frontend.solution import apply_line_patch +from esbmc_ai.solution import SourceFile from esbmc_ai.ai_models import AIModel from .base_chat_interface import BaseChatInterface @@ -205,6 +205,8 @@ def generate_solution(self) -> tuple[str, FinishReason]: assert ( line ), "fix code command: error line could not be found to apply brutal patch replacement" - solution = apply_line_patch(self.source_code_raw, solution, line, line) + solution = SourceFile.apply_line_patch( + self.source_code_raw, solution, line, line + ) return solution, response.finish_reason diff --git a/esbmc_ai/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py index e0b6a5d..d80b6ef 100644 --- a/esbmc_ai/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -8,6 +8,7 @@ from esbmc_ai.chats import LatestStateSolutionGenerator, SolutionGenerator from esbmc_ai.chats.solution_generator import ESBMCTimedOutException from esbmc_ai.reverse_order_solution_generator import ReverseOrderSolutionGenerator +from esbmc_ai.solution import SourceFile from .chat_command import ChatCommand from .. import config @@ -41,12 +42,11 @@ def print_raw_conversation() -> None: print("\n" + "\n\n".join(messages)) print("ESBMC-AI Notice: End of raw conversation") - file_name: str = kwargs["file_name"] - source_code: str = kwargs["source_code"] - esbmc_output: str = kwargs["esbmc_output"] + source_file: SourceFile = kwargs["source_file"] + assert source_file.file_path # Parse the esbmc output here and determine what "Scenario" to use. - scenario: str = esbmc_get_error_type(esbmc_output) + scenario: str = esbmc_get_error_type(source_file.initial_verifier_output) printv(f"Scenario: {scenario}") printv( @@ -105,8 +105,8 @@ def print_raw_conversation() -> None: try: solution_generator.update_state( - source_code=source_code, - esbmc_output=esbmc_output, + source_code=source_file.latest_content, + esbmc_output=source_file.latest_verifier_output, ) except ESBMCTimedOutException: print("error: ESBMC has timed out...") @@ -126,13 +126,13 @@ def print_raw_conversation() -> None: if finish_reason == FinishReason.length: solution_generator.compress_message_stack() else: - source_code = llm_solution + source_file.update_content(llm_solution) break # Print verbose lvl 2 printvv("\nESBMC-AI Notice: Source Code Generation:") print_horizontal_line(2) - printvv(source_code) + printvv(source_file.latest_content) print_horizontal_line(2) printvv("") @@ -140,34 +140,39 @@ def print_raw_conversation() -> None: # to a temporary location since ESBMC needs it in file format. self.anim.start("Verifying with ESBMC... Please Wait") exit_code, esbmc_output = esbmc_load_source_code( - file_path=file_name, - source_code=source_code, + file_path=source_file.file_path, + source_code=source_file.latest_content, esbmc_params=config.esbmc_params, auto_clean=config.temp_auto_clean, timeout=config.verifier_timeout, ) self.anim.stop() + source_file.assign_verifier_output(esbmc_output) + del esbmc_output + # Print verbose lvl 2 printvv("\nESBMC-AI Notice: ESBMC Output:") print_horizontal_line(2) - printvv(esbmc_output) + printvv(source_file.latest_verifier_output) print_horizontal_line(2) # Solution found if exit_code == 0: - self.on_solution_signal.emit(source_code) + self.on_solution_signal.emit(source_file.latest_content) if config.raw_conversation: print_raw_conversation() printv("ESBMC-AI Notice: Successfully verified code") - return False, source_code + return False, source_file.latest_content try: # Update state - solution_generator.update_state(source_code, esbmc_output) + solution_generator.update_state( + source_file.latest_content, source_file.latest_verifier_output + ) except ESBMCTimedOutException: if config.raw_conversation: print_raw_conversation() diff --git a/tests/test_solution.py b/tests/test_solution.py index e303c49..52ac487 100644 --- a/tests/test_solution.py +++ b/tests/test_solution.py @@ -1,13 +1,16 @@ # Author: Yiannis Charalambous -from esbmc_ai.frontend.solution import apply_line_patch +from esbmc_ai.solution import SourceFile def test_apply_line_patch() -> None: text = "\n".join(["a", "b", "c", "d", "e", "f", "g"]) answer = "\n".join(["a", "b", "1", "g"]) - assert apply_line_patch(text, "1", 2, 5) == answer + assert SourceFile.apply_line_patch(text, "1", 2, 5) == answer text = "\n".join(["a", "b", "c", "d", "e", "f", "g"]) answer = "\n".join(["a", "b", "c", "1", "e", "f", "g"]) - assert apply_line_patch(text, "1", 3, 3) == answer + assert SourceFile.apply_line_patch(text, "1", 3, 3) == answer + + +# TODO Add more tests for solution and sourcefile From 0107c80a9b41a90c20a275754a45db6ee2f91609 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Wed, 4 Sep 2024 17:45:04 +0300 Subject: [PATCH 05/18] Added patching argument to generate program patches --- esbmc_ai/__main__.py | 13 +++++++- esbmc_ai/solution.py | 73 ++++++++++++++++++++++++++++-------------- tests/test_solution.py | 47 ++++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 26 deletions(-) diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index fe9b97c..4f0f7fd 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -284,6 +284,14 @@ def main() -> None: + "}", ) + parser.add_argument( + "-p", + "--generate-patches", + action="store_true", + default=False, + help="Generate patch files and place them in the same folder as the source files.", + ) + args: argparse.Namespace = parser.parse_args() print(f"ESBMC-AI {__version__}") @@ -307,7 +315,10 @@ def main() -> None: assert isinstance(args.filename, str) - # Command mode: Check if command is called and call it. + # =========================================== + # Command mode + # =========================================== + # Check if command is called and call it. # If not, then continue to user mode. if args.command != None: command = args.command diff --git a/esbmc_ai/solution.py b/esbmc_ai/solution.py index 963d6af..fce6284 100644 --- a/esbmc_ai/solution.py +++ b/esbmc_ai/solution.py @@ -5,9 +5,33 @@ from typing import Optional from pathlib import Path +from subprocess import PIPE, STDOUT, run, CompletedProcess +from tempfile import NamedTemporaryFile class SourceFile(object): + @classmethod + def apply_line_patch( + cls, source_code: str, patch: str, start: int, end: int + ) -> str: + """Applies a patch to the source code. + + To replace a single line, start and end are equal. + + Args: + * source_code - The source code to apply the patch to. + * patch - Can be a line or multiple lines but will replace the start and + end region defined. + * start - Line index to mark start of replacement. + * end - Marks the end of the region where the patch will be applied to. + End is non-inclusive.""" + assert ( + start <= end + ), f"start ({start}) needs to be less than or equal to end ({end})" + lines: list[str] = source_code.splitlines() + lines = lines[:start] + [patch] + lines[end + 1 :] + return "\n".join(lines) + def __init__(self, file_path: Optional[Path], content: str) -> None: self._file_path: Optional[Path] = file_path # Content file shows the file throughout the repair process. Index 0 is @@ -48,9 +72,32 @@ def verifier_output(self) -> dict[int, str]: def get_patch(self, index_a: int, index_b: int) -> str: """Return diff between `index_a` and `index_b` which are indicies referencing the content list.""" - raise NotImplementedError() + assert len(self._content) > index_a and len(self._content) > index_b + + # Save as temp files + file_a = NamedTemporaryFile(mode="w", dir="/tmp") + file_a.write(self._content[index_a]) + file_b = NamedTemporaryFile(mode="w", dir="/tmp") + file_b.write(self._content[index_b]) + + cmd = ["diff", file_a.name, file_b.name] + process: CompletedProcess = run( + cmd, + stdout=PIPE, + stderr=STDOUT, + ) + + if process.returncode != 0: + raise ChildProcessError( + f"Diff for {index_a} and {index_b} for file {self._file_path} failed with exit code {process.returncode}" + ) - def push_line_path(self, patch: str, start: int, end: int) -> str: + file_a.close() + file_b.close() + + return process.stdout.decode("utf-8") + + def push_line_patch(self, patch: str, start: int, end: int) -> str: """Calls `apply_line_patch` using the latest source code and then pushes the new patch to the content.""" new_source: str = SourceFile.apply_line_patch( @@ -73,28 +120,6 @@ def assign_verifier_output(self, verifier_output: str, index: int = -1) -> None: index = len(self._content) - 1 self._esbmc_output[index] = verifier_output - @classmethod - def apply_line_patch( - cls, source_code: str, patch: str, start: int, end: int - ) -> str: - """Applies a patch to the source code. - - To replace a single line, start and end are equal. - - Args: - * source_code - The source code to apply the patch to. - * patch - Can be a line or multiple lines but will replace the start and - end region defined. - * start - Line index to mark start of replacement. - * end - Marks the end of the region where the patch will be applied to. - End is non-inclusive.""" - assert ( - start <= end - ), f"start ({start}) needs to be less than or equal to end ({end})" - lines: list[str] = source_code.splitlines() - lines = lines[:start] + [patch] + lines[end + 1 :] - return "\n".join(lines) - class Solution(object): def __init__(self, files: list[Path] = []) -> None: diff --git a/tests/test_solution.py b/tests/test_solution.py index 52ac487..c3b6fa0 100644 --- a/tests/test_solution.py +++ b/tests/test_solution.py @@ -1,6 +1,39 @@ # Author: Yiannis Charalambous -from esbmc_ai.solution import SourceFile +import pytest +from esbmc_ai.solution import Solution, SourceFile + + +@pytest.fixture(scope="function") +def solution() -> Solution: + return Solution() + + +def test_add_source_file(solution) -> None: + src: str = "int main(int argc, char** argv) {return 0;}" + solution.add_source_file(None, src) + assert ( + len(solution.files) == 1 + and solution.files[0].file_path == None + and solution.files[0].latest_content == src + ) + src = '#include int main(int argc, char** argv) { printf("hello world\n"); return 0;}' + solution.add_source_file("Testfile1", src) + assert ( + len(solution.files) == 2 + and solution.files[1].file_path == "Testfile1" + and solution.files[1].latest_content == src + ) + assert ( + len(solution.files_mapped) == 1 + and solution.files_mapped["Testfile1"].file_path == "Testfile1" + and solution.files_mapped["Testfile1"].initial_content == src + ) + + +##################################### +# SourceFile +##################################### def test_apply_line_patch() -> None: @@ -13,4 +46,16 @@ def test_apply_line_patch() -> None: assert SourceFile.apply_line_patch(text, "1", 3, 3) == answer +def test_get_patch() -> None: + raise NotImplementedError() + + +def test_update_content() -> None: + raise NotImplementedError() + + +def test_assign_verifier_output() -> None: + raise NotImplementedError() + + # TODO Add more tests for solution and sourcefile From 89df913b8176a1fdaa0ff8f79acd05efc4e5c59a Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 00:02:10 +0300 Subject: [PATCH 06/18] Finished adding patch argument --- esbmc_ai/__main__.py | 40 ++++++++++----------- esbmc_ai/commands/chat_command.py | 5 +-- esbmc_ai/commands/command_result.py | 15 ++++++++ esbmc_ai/commands/exit_command.py | 2 +- esbmc_ai/commands/fix_code_command.py | 52 +++++++++++++++++++++------ esbmc_ai/config.py | 5 +++ 6 files changed, 84 insertions(+), 35 deletions(-) create mode 100644 esbmc_ai/commands/command_result.py diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index 4f0f7fd..e9a715d 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -11,6 +11,8 @@ import readline from typing import Optional +from esbmc_ai.commands.fix_code_command import FixCodeCommandResult + _ = readline import argparse @@ -19,7 +21,7 @@ import esbmc_ai.config as config from esbmc_ai import __author__, __version__ -from esbmc_ai.solution import SourceFile, Solution, get_solution, init_solution +from esbmc_ai.solution import SourceFile, Solution, get_solution from esbmc_ai.commands import ( ChatCommand, @@ -166,15 +168,13 @@ def init_commands() -> None: def _run_command_mode(command: ChatCommand, args: argparse.Namespace) -> None: path_arg: Path = Path(args.filename) - solution: Solution + solution: Solution = get_solution() if path_arg.is_dir(): - solution = init_solution( - Solution( - [path for path in path_arg.glob("**/*") if path.is_file() and path.name] - ) - ) + for path in path_arg.glob("**/*"): + if path.is_file() and path.name: + solution.add_source_file(path, None) else: - solution = init_solution(Solution([path_arg])) + solution.add_source_file(path_arg, None) match command.command_name: case fix_code_command.command_name: @@ -183,14 +183,12 @@ def _run_command_mode(command: ChatCommand, args: argparse.Namespace) -> None: esbmc_output: str = _run_esbmc(source_file) source_file.assign_verifier_output(esbmc_output) - error, source_file_solution = fix_code_command.execute( - source_file=source_file + result: FixCodeCommandResult = fix_code_command.execute( + source_file=source_file, + generate_patches=config.generate_patches, ) - if error: - print("Failed all attempts...") - else: - print(source_file_solution) + print(result) case _: command.execute() sys.exit(0) @@ -346,7 +344,8 @@ def main() -> None: sys.exit(1) else: # Add the main source file to the solution explorer. - solution = init_solution(Solution([path_arg])) + solution: Solution = get_solution() + solution.add_source_file(path_arg, None) del path_arg # Assert that we have one file and one filepath @@ -421,17 +420,16 @@ def main() -> None: print() print("ESBMC-AI will generate a fix for the code...") - error: bool - source_file_solution: str - error, source_file_solution = fix_code_command.execute( - source_file=source_file + result: FixCodeCommandResult = fix_code_command.execute( + source_file=source_file, + generate_patches=config.generate_patches, ) - if not error: + if result.successful: print( "\n\nESBMC-AI: Here is the corrected code, verified with ESBMC:" ) - print(f"```\n{source_file_solution}\n```") + print(f"```\n{result.repaired_source}\n```") continue else: # Commands without parameters or returns are handled automatically. diff --git a/esbmc_ai/commands/chat_command.py b/esbmc_ai/commands/chat_command.py index c91105e..6109e1c 100644 --- a/esbmc_ai/commands/chat_command.py +++ b/esbmc_ai/commands/chat_command.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import Any, Optional +from esbmc_ai.commands.command_result import CommandResult + class ChatCommand(ABC): command_name: str @@ -17,7 +19,6 @@ def __init__( self.command_name = command_name self.help_message = help_message - @abstractmethod - def execute(self, **kwargs: Optional[Any]) -> Optional[Any]: + def execute(self, **kwargs: Optional[Any]) -> Optional[CommandResult]: raise NotImplementedError(f"Command {self.command_name} is not implemented.") diff --git a/esbmc_ai/commands/command_result.py b/esbmc_ai/commands/command_result.py new file mode 100644 index 0000000..58703cc --- /dev/null +++ b/esbmc_ai/commands/command_result.py @@ -0,0 +1,15 @@ +# Author: Yiannis Charalambous + +from abc import abstractmethod + + +class CommandResult: + @property + @abstractmethod + def successful(self) -> bool: + raise NotImplementedError() + + def __str__(self) -> str: + return f"Command returned " + ( + "successful" if self.successful else "unsuccessful" + ) diff --git a/esbmc_ai/commands/exit_command.py b/esbmc_ai/commands/exit_command.py index 2fb38dd..943c446 100644 --- a/esbmc_ai/commands/exit_command.py +++ b/esbmc_ai/commands/exit_command.py @@ -15,6 +15,6 @@ def __init__(self) -> None: ) @override - def execute(self, **_: Optional[Any]) -> Optional[Any]: + def execute(self, **_: Optional[Any]) -> None: print("exiting...") sys.exit(0) diff --git a/esbmc_ai/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py index d80b6ef..62f8d4d 100644 --- a/esbmc_ai/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -1,12 +1,13 @@ # Author: Yiannis Charalambous import sys -from typing import Any, Tuple +from typing import Any, Optional, Tuple from typing_extensions import override from esbmc_ai.chat_response import FinishReason from esbmc_ai.chats import LatestStateSolutionGenerator, SolutionGenerator from esbmc_ai.chats.solution_generator import ESBMCTimedOutException +from esbmc_ai.commands.command_result import CommandResult from esbmc_ai.reverse_order_solution_generator import ReverseOrderSolutionGenerator from esbmc_ai.solution import SourceFile @@ -19,6 +20,27 @@ esbmc_load_source_code, ) from ..logging import print_horizontal_line, printv, printvv +from subprocess import CalledProcessError + + +class FixCodeCommandResult(CommandResult): + def __init__(self, successful: bool, repaired_source: Optional[str] = None) -> None: + super().__init__() + self._successful: bool = successful + self.repaired_source: Optional[str] = repaired_source + + @property + @override + def successful(self) -> bool: + return self._successful + + @override + def __str__(self) -> str: + return ( + self.repaired_source + if self._successful and self.repaired_source != None + else "ESBMC-AI Notice: Failed all attempts..." + ) class FixCodeCommand(ChatCommand): @@ -32,7 +54,7 @@ def __init__(self) -> None: self.anim = create_loading_widget() @override - def execute(self, **kwargs: Any) -> Tuple[bool, str]: + def execute(self, **kwargs: Any) -> FixCodeCommandResult: def print_raw_conversation() -> None: print_horizontal_line(0) print("ESBMC-AI Notice: Printing raw conversation...") @@ -44,6 +66,9 @@ def print_raw_conversation() -> None: source_file: SourceFile = kwargs["source_file"] assert source_file.file_path + generate_patches: bool = ( + kwargs["generate_patches"] if "generate_patches" in kwargs else False + ) # Parse the esbmc output here and determine what "Scenario" to use. scenario: str = esbmc_get_error_type(source_file.initial_verifier_output) @@ -114,8 +139,8 @@ def print_raw_conversation() -> None: print() - max_retries: int = config.fix_code_max_attempts - for idx in range(max_retries): + attempts: int = config.fix_code_max_attempts + for attempt in range(1, attempts + 1): # Get a response. Use while loop to account for if the message stack # gets full, then need to compress and retry. while True: @@ -166,7 +191,13 @@ def print_raw_conversation() -> None: printv("ESBMC-AI Notice: Successfully verified code") - return False, source_file.latest_content + returned_source: str + if generate_patches: + returned_source = source_file.get_patch(0, -1) + else: + returned_source = source_file.latest_content + + return FixCodeCommandResult(True, returned_source) try: # Update state @@ -180,13 +211,12 @@ def print_raw_conversation() -> None: sys.exit(1) # Failure case - print( - f"ESBMC-AI Notice: Failure {idx+1}/{max_retries}" + ": Retrying..." - if idx != max_retries - 1 - else "" - ) + if attempt != attempts: + print(f"ESBMC-AI Notice: Failure {attempt}/{attempts}: Retrying...") + else: + print(f"ESBMC-AI Notice: Failure {attempt}/{attempts}") if config.raw_conversation: print_raw_conversation() - return True, "ESBMC-AI Notice: Failed all attempts..." + return FixCodeCommandResult(False, None) diff --git a/esbmc_ai/config.py b/esbmc_ai/config.py index 44007b3..dfc752d 100644 --- a/esbmc_ai/config.py +++ b/esbmc_ai/config.py @@ -57,6 +57,8 @@ cfg_path: str +generate_patches: bool + # TODO Get rid of this class as soon as ConfigTool with the pyautoconfig class AIAgentConversation(NamedTuple): @@ -498,3 +500,6 @@ def load_args(args) -> None: esbmc_params.extend(args.remaining) elif len(args.remaining) != 0: esbmc_params = args.remaining + + global generate_patches + generate_patches = args.generate_patches From 37904e62ca715c7845f8a858bfb5cfbc8462e6d9 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 00:02:28 +0300 Subject: [PATCH 07/18] Removed frontend module and scripts folder --- esbmc_ai/frontend/__init__.py | 1 - scripts/function_equivalence.c | 33 --------------------------------- 2 files changed, 34 deletions(-) delete mode 100644 esbmc_ai/frontend/__init__.py delete mode 100644 scripts/function_equivalence.c diff --git a/esbmc_ai/frontend/__init__.py b/esbmc_ai/frontend/__init__.py deleted file mode 100644 index 1f91aa3..0000000 --- a/esbmc_ai/frontend/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Author: Yiannis Charalambous diff --git a/scripts/function_equivalence.c b/scripts/function_equivalence.c deleted file mode 100644 index 16cb943..0000000 --- a/scripts/function_equivalence.c +++ /dev/null @@ -1,33 +0,0 @@ -#include - -/////////////////////////////////////////// -///////////// OLD ///////////////////////// -/////////////////////////////////////////// - -$function_old; - -/////////////////////////////////////////// -///////////// NEW ///////////////////////// -/////////////////////////////////////////// - -$function_new; - -/////////////////////////////////////////// -///////////// ESBMC MAIN ////////////////// -/////////////////////////////////////////// - -int main() -{ - // Need to dynamically build parameters to supply. - $parameters_list; - - // Need to call old and new functions and compare results. - $function_call_old; - - $function_call_new; - - // Check whether both functions produce the same output - assert($function_assert_old == $function_assert_new); - - return 0; -} From ce49a3cbbaaca1f4ac4ec1213bbfa3b44a8ca165 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 00:07:32 +0300 Subject: [PATCH 08/18] Added calculating cyclomatic complexity to source file --- esbmc_ai/solution.py | 166 ++++++++++++++++++++++++++++++++----------- 1 file changed, 124 insertions(+), 42 deletions(-) diff --git a/esbmc_ai/solution.py b/esbmc_ai/solution.py index fce6284..aa132d5 100644 --- a/esbmc_ai/solution.py +++ b/esbmc_ai/solution.py @@ -3,13 +3,19 @@ """# Solution Keeps track of all the source files that ESBMC-AI is targeting. """ +import os from typing import Optional from pathlib import Path from subprocess import PIPE, STDOUT, run, CompletedProcess -from tempfile import NamedTemporaryFile +from tempfile import NamedTemporaryFile, gettempdir +import lizard -class SourceFile(object): +class SourceFile: + """Represents a source file in the Solution. This class also holds the + verifier output. Contains methods to manipulate and get information about + different versions.""" + @classmethod def apply_line_patch( cls, source_code: str, patch: str, start: int, end: int @@ -42,31 +48,38 @@ def __init__(self, file_path: Optional[Path], content: str) -> None: @property def file_path(self) -> Optional[Path]: + """Returns the file path of this source file.""" return self._file_path @property def initial_content(self) -> str: + """Returns the initial state content.""" return self._content[0] @property def latest_content(self) -> str: + """Returns the latest available content.""" return self._content[-1] @property def content(self) -> tuple[str, ...]: + """Returns a tuple of the content of this source file.""" return tuple(self._content) @property def initial_verifier_output(self) -> str: + """Returns the first verifier output""" assert 0 in self._esbmc_output, "Error: No initial verifier output assigned." return self._esbmc_output[0] @property def latest_verifier_output(self) -> str: + """Returns the latest verifier output""" return self._esbmc_output[len(self._content) - 1] @property def verifier_output(self) -> dict[int, str]: + """Returns the verifier outputs of the SourceFile""" return self._esbmc_output def get_patch(self, index_a: int, index_b: int) -> str: @@ -75,54 +88,126 @@ def get_patch(self, index_a: int, index_b: int) -> str: assert len(self._content) > index_a and len(self._content) > index_b # Save as temp files - file_a = NamedTemporaryFile(mode="w", dir="/tmp") - file_a.write(self._content[index_a]) - file_b = NamedTemporaryFile(mode="w", dir="/tmp") - file_b.write(self._content[index_b]) - - cmd = ["diff", file_a.name, file_b.name] - process: CompletedProcess = run( - cmd, - stdout=PIPE, - stderr=STDOUT, - ) - - if process.returncode != 0: - raise ChildProcessError( - f"Diff for {index_a} and {index_b} for file {self._file_path} failed with exit code {process.returncode}" + with ( + NamedTemporaryFile(mode="w", delete=False) as file_a, + NamedTemporaryFile(mode="w", delete=False) as file_b, + ): + file_a.write(self._content[index_a]) + file_a.flush() + file_b.write(self._content[index_b]) + file_b.flush() + + cmd = ["diff", file_a.name, file_b.name] + process: CompletedProcess = run( + cmd, + stdout=PIPE, + stderr=STDOUT, + check=False, ) - file_a.close() - file_b.close() - - return process.stdout.decode("utf-8") - - def push_line_patch(self, patch: str, start: int, end: int) -> str: - """Calls `apply_line_patch` using the latest source code and then pushes - the new patch to the content.""" - new_source: str = SourceFile.apply_line_patch( - source_code=self._content[-1], - patch=patch, - start=start, - end=end, - ) - self._content.append(new_source) - return new_source + # Exit status is 0 if inputs are the same, 1 if different, 2 if trouble. + # https://askubuntu.com/questions/698784/exit-code-of-diff + if process.returncode == 2: + raise ValueError( + f"Diff for {file_a.name} and {file_b.name} failed (exit 2)." + ) + + return process.stdout.decode("utf-8") + + # Not used + # def push_line_patch(self, patch: str, start: int, end: int) -> str: + # """Calls `apply_line_patch` using the latest source code and then pushes + # the new patch to the content.""" + # new_source: str = SourceFile.apply_line_patch( + # source_code=self._content[-1], + # patch=patch, + # start=start, + # end=end, + # ) + # self._content.append(new_source) + # return new_source def update_content(self, content: str, reset_changes: bool = False) -> None: + """Ascociates a new version of the content of a file to a file.""" if reset_changes: self._content = [content] else: self._content.append(content) def assign_verifier_output(self, verifier_output: str, index: int = -1) -> None: + """Assigns verifier output to the ascociated file. If no file is given, + then assigns to the latest one.""" if index < 0: index = len(self._content) - 1 self._esbmc_output[index] = verifier_output + def save_file( + self, file_path: Optional[Path], temp_dir: bool = True, index: int = -1 + ) -> Path: + """Saves the source code file. If file_path is not specified, it + will generate an automatic name. If temp_dir is True, it will place + the saved file in /tmp and use the file_path file name only.""" + # Ensure if file_path if given, then it is a file. + assert not file_path or file_path.is_file(), "file_path needs to be a file" + + file_name: str + dir_path: Optional[Path] = None + if file_path: + file_name = file_path.name + dir_path = file_path.parent + else: + if not self._file_path: + raise ValueError( + "Source code file does not have a name or file_path to save to" + ) + file_name = self._file_path.name -class Solution(object): - def __init__(self, files: list[Path] = []) -> None: + if temp_dir: + dir_path = Path(gettempdir()) + + assert ( + dir_path + ), "dir_path could not be retrieved: file_path or temp_dir need to be set." + + # Create path if it does not exist. + if not os.path.exists(dir_path): + os.mkdir(dir_path) + + with NamedTemporaryFile( + mode="w", + buffering=-1, + newline=None, + suffix=None, + prefix=file_name, + dir=dir_path, + delete=False, + ) as temp_file: + temp_file.write(self.content[index]) + return Path(temp_file.name) + + @classmethod + def calculate_cyclomatic_complexity_delta( + cls, source_1: "SourceFile", source_2: "SourceFile" + ) -> Optional[float]: + """Calculates the cyclomatic complexity difference between the two source files.""" + try: + file_1: Path = source_1.save_file(None, True) + file_2: Path = source_2.save_file(None, True) + + cc1 = lizard.analyze_file(file_1) + cc2 = lizard.analyze_file(file_2) + + return cc1.average_cyclomatic_complexity - cc2.average_cyclomatic_complexity + except IOError: + return None + + +class Solution: + """Represents a solution, that is a collection of all the source files that + ESBMC-AI will be involved in analyzing.""" + + def __init__(self, files: Optional[list[Path]] = None) -> None: + files = files if files else [] self._files: list[SourceFile] = [] for file_path in files: with open(file_path, "r") as file: @@ -130,11 +215,12 @@ def __init__(self, files: list[Path] = []) -> None: @property def files(self) -> tuple[SourceFile, ...]: + """Will return a list of the files. Returns by value.""" return tuple(self._files) @property def files_mapped(self) -> dict[Path, SourceFile]: - """Will return the files mapped to their directory.""" + """Will return the files mapped to their directory. Returns by value.""" return { source_file.file_path: source_file for source_file in self._files @@ -144,6 +230,7 @@ def files_mapped(self) -> dict[Path, SourceFile]: def add_source_file( self, file_path: Optional[Path], content: Optional[str] ) -> None: + """Add a source file to the solution.""" if file_path: if content: self._files.append(SourceFile(file_path, content)) @@ -164,11 +251,6 @@ def add_source_file( _solution: Solution = Solution() -def init_solution(solution: Solution) -> Solution: - global _solution - _solution = solution - return _solution - - def get_solution() -> Solution: + """Returns the global default solution object.""" return _solution From a36b42c150d054f88dae7b3344dae5e15007e433 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 00:07:47 +0300 Subject: [PATCH 09/18] Updated launch.json to use debugpy --- .vscode/launch.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 99814f0..caf4bfc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "configurations": [ { "name": "Launch ESBMC-AI on Open File", - "type": "python", + "type": "debugpy", "request": "launch", "module": "esbmc_ai", "justMyCode": true, @@ -17,7 +17,7 @@ }, { "name": "Fix Code on Open File", - "type": "python", + "type": "debugpy", "request": "launch", "module": "esbmc_ai", "justMyCode": true, @@ -31,7 +31,7 @@ }, { "name": "Optimize Code on Open File", - "type": "python", + "type": "debugpy", "request": "launch", "module": "esbmc_ai", "justMyCode": true, @@ -44,7 +44,7 @@ }, { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -52,7 +52,7 @@ }, { "name": "Run tests", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pytest", "justMyCode": true, @@ -63,7 +63,7 @@ }, { "name": "Run coverage tests", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pytest", "justMyCode": true, From c498a65f97c3e883df1f6acf6ea436c306a061b2 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 00:08:01 +0300 Subject: [PATCH 10/18] pylint: disable unspecified-encoding --- .pylintrc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index c6dcdc0..b5c747f 100644 --- a/.pylintrc +++ b/.pylintrc @@ -430,7 +430,8 @@ disable=raw-checker-failed, deprecated-pragma, use-symbolic-message-instead, use-implicit-booleaness-not-comparison-to-string, - use-implicit-booleaness-not-comparison-to-zero + use-implicit-booleaness-not-comparison-to-zero, + unspecified-encoding # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option From 67359a0f1ad6e0ce454f0c3742307b8b4c09cc67 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 00:08:10 +0300 Subject: [PATCH 11/18] Added lizard library --- Pipfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Pipfile b/Pipfile index c739635..3f3c2e2 100644 --- a/Pipfile +++ b/Pipfile @@ -25,6 +25,7 @@ clang = "*" langchain = "*" langchain-openai = "*" langchain-community = "*" +lizard = "*" [dev-packages] pylint = "*" From fc817353fc60e7515ce0317f590f8574ad2c182c Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:32:24 +0300 Subject: [PATCH 12/18] config: temp_file_dir can now be None --- esbmc_ai/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esbmc_ai/config.py b/esbmc_ai/config.py index dfc752d..28b4ccc 100644 --- a/esbmc_ai/config.py +++ b/esbmc_ai/config.py @@ -37,7 +37,7 @@ ] temp_auto_clean: bool = True -temp_file_dir: str = "." +temp_file_dir: Optional[str] = None ai_model: AIModel esbmc_output_type: str = "full" From 5824089d311f50662ce1a5c13caf999e3762df06 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:33:10 +0300 Subject: [PATCH 13/18] esbmc_load_source_code now uses SourceFile to do the saving --- esbmc_ai/esbmc_util.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 0ee5b68..a034a2e 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -6,6 +6,8 @@ from pathlib import Path from typing import Optional +from esbmc_ai.solution import SourceFile + from . import config @@ -123,38 +125,34 @@ def esbmc(path: Path, esbmc_params: list, timeout: Optional[float] = None): def esbmc_load_source_code( - file_path: Path, - source_code: str, + source_file: SourceFile, + source_file_content_index: int, esbmc_params: list = config.esbmc_params, auto_clean: bool = config.temp_auto_clean, timeout: Optional[float] = None, ): - source_code_path = Path(file_path) - - # Create temp path. - delete_path: bool = False - if not os.path.exists(config.temp_file_dir): - os.mkdir(config.temp_file_dir) - delete_path = True - - temp_file_path = Path(config.temp_file_dir) / source_code_path.name - # Create temp file. - with open(temp_file_path, "w") as file: - # Save to temporary folder and flush contents. - file.write(source_code) - file.flush() + file_path: Path + if config.temp_file_dir: + file_path = source_file.save_file( + file_path=Path(config.temp_file_dir), + temp_dir=False, + index=source_file_content_index, + ) + else: + file_path = source_file.save_file( + file_path=None, + temp_dir=True, + index=source_file_content_index, + ) - # Call ESBMC to temporary folder. - results = esbmc(temp_file_path, esbmc_params, timeout=timeout) + # Call ESBMC to temporary folder. + results = esbmc(file_path, esbmc_params, timeout=timeout) # Delete temp files and path if auto_clean: # Remove file - os.remove(temp_file_path) - # Remove file path if created this run and is empty. - if delete_path and len(os.listdir(config.temp_file_dir)) == 0: - os.rmdir(config.temp_file_dir) + os.remove(file_path) # Return return results From 4b76fc477a9ec1346903edaa2d1e18a490818e03 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:34:31 +0300 Subject: [PATCH 14/18] Add file extension tracking to SourceFile --- esbmc_ai/solution.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/esbmc_ai/solution.py b/esbmc_ai/solution.py index aa132d5..ee9c10e 100644 --- a/esbmc_ai/solution.py +++ b/esbmc_ai/solution.py @@ -38,19 +38,31 @@ def apply_line_patch( lines = lines[:start] + [patch] + lines[end + 1 :] return "\n".join(lines) - def __init__(self, file_path: Optional[Path], content: str) -> None: + def __init__( + self, file_path: Optional[Path], content: str, file_ext: Optional[str] = None + ) -> None: self._file_path: Optional[Path] = file_path # Content file shows the file throughout the repair process. Index 0 is # the orignial. self._content: list[str] = [content] # Map _content (each iteration) to esbmc output self._esbmc_output: dict[int, str] = {} + self._file_ext: Optional[str] = file_ext @property def file_path(self) -> Optional[Path]: """Returns the file path of this source file.""" return self._file_path + @property + def file_extension(self) -> str: + if self._file_ext: + return self._file_ext + elif self._file_path: + return self._file_path.suffix + else: + raise ValueError("No extension for SourceFile could be resolved") + @property def initial_content(self) -> str: """Returns the initial state content.""" @@ -147,19 +159,23 @@ def save_file( """Saves the source code file. If file_path is not specified, it will generate an automatic name. If temp_dir is True, it will place the saved file in /tmp and use the file_path file name only.""" - # Ensure if file_path if given, then it is a file. - assert not file_path or file_path.is_file(), "file_path needs to be a file" - file_name: str + file_name: Optional[str] = None dir_path: Optional[Path] = None if file_path: - file_name = file_path.name - dir_path = file_path.parent + # If file path is a file, then use the name and directory. If not + # then use a temporary name and just store the folder. + if file_path.is_file(): + file_name = file_path.name + dir_path = file_path.parent + else: + dir_path = file_path else: if not self._file_path: raise ValueError( "Source code file does not have a name or file_path to save to" ) + # Just store the file and use the temp dir. file_name = self._file_path.name if temp_dir: @@ -177,7 +193,7 @@ def save_file( mode="w", buffering=-1, newline=None, - suffix=None, + suffix=self.file_extension, prefix=file_name, dir=dir_path, delete=False, From f9293551f6e620b24e9f431bc0a7e23056478edd Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:34:52 +0300 Subject: [PATCH 15/18] Update FixCodeCommand --- esbmc_ai/commands/fix_code_command.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esbmc_ai/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py index 62f8d4d..dce3e2b 100644 --- a/esbmc_ai/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -165,8 +165,8 @@ def print_raw_conversation() -> None: # to a temporary location since ESBMC needs it in file format. self.anim.start("Verifying with ESBMC... Please Wait") exit_code, esbmc_output = esbmc_load_source_code( - file_path=source_file.file_path, - source_code=source_file.latest_content, + source_file=source_file, + source_file_content_index=-1, esbmc_params=config.esbmc_params, auto_clean=config.temp_auto_clean, timeout=config.verifier_timeout, From a99817e3b63173ac1540e2c031b1e045b8f5ef23 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:47:24 +0300 Subject: [PATCH 16/18] Update config --- config.json | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/config.json b/config.json index 06a4f43..dd1f40a 100644 --- a/config.json +++ b/config.json @@ -70,7 +70,7 @@ ] }, "generate_solution": { - "max_attempts": 4, + "max_attempts": 5, "temperature": 0.0, "message_history": "normal", "scenarios": { @@ -87,8 +87,13 @@ ] } }, - "system": [], - "initial": "From now on, act as an Automated Code Repair Tool that repairs AI C code. You will be shown AI C code, along with ESBMC output. Pay close attention to the ESBMC output, which contains a stack trace along with the type of error that occurred and its location. Provide the repaired C code as output, as would an Automated Code Repair Tool. Aside from the corrected source code, do not output any other text. The ESBMC output is {esbmc_output} The source code is {source_code}" + "system": [ + { + "role": "System", + "content": "From now on, act as an Automated Code Repair Tool that repairs AI C code. You will be shown AI C code, along with ESBMC output. Pay close attention to the ESBMC output, which contains a stack trace along with the type of error that occurred and its location. " + } + ], + "initial": "Provide the repaired C code as output, as would an Automated Code Repair Tool. Aside from the corrected source code, do not output any other text. The ESBMC output is {esbmc_output} The source code is {source_code}" } } } \ No newline at end of file From f5d1d6b900f50da3a1987ea4debe5710ea863964 Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:54:08 +0300 Subject: [PATCH 17/18] Updated solution --- esbmc_ai/solution.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/esbmc_ai/solution.py b/esbmc_ai/solution.py index ee9c10e..05cf2a2 100644 --- a/esbmc_ai/solution.py +++ b/esbmc_ai/solution.py @@ -140,7 +140,7 @@ def get_patch(self, index_a: int, index_b: int) -> str: # return new_source def update_content(self, content: str, reset_changes: bool = False) -> None: - """Ascociates a new version of the content of a file to a file.""" + """Ascociates a new version of the content of source code to a file.""" if reset_changes: self._content = [content] else: @@ -150,7 +150,8 @@ def assign_verifier_output(self, verifier_output: str, index: int = -1) -> None: """Assigns verifier output to the ascociated file. If no file is given, then assigns to the latest one.""" if index < 0: - index = len(self._content) - 1 + # Simulate negative indicies like with Lists. + index = len(self._content) + index self._esbmc_output[index] = verifier_output def save_file( @@ -203,12 +204,15 @@ def save_file( @classmethod def calculate_cyclomatic_complexity_delta( - cls, source_1: "SourceFile", source_2: "SourceFile" + cls, + source_1: "SourceFile", + source_2: "SourceFile", + temp_dir: bool = True, ) -> Optional[float]: """Calculates the cyclomatic complexity difference between the two source files.""" try: - file_1: Path = source_1.save_file(None, True) - file_2: Path = source_2.save_file(None, True) + file_1: Path = source_1.save_file(None, temp_dir=temp_dir) + file_2: Path = source_2.save_file(None, temp_dir=temp_dir) cc1 = lizard.analyze_file(file_1) cc2 = lizard.analyze_file(file_2) From bd02f6a42033671618baa9c33dedaad0fe0e5c9c Mon Sep 17 00:00:00 2001 From: Yiannis Charalambous Date: Thu, 5 Sep 2024 15:57:09 +0300 Subject: [PATCH 18/18] Updated tests --- tests/regtest/test_base_chat_interface.py | 4 ++-- tests/test_base_chat_interface.py | 4 ++-- tests/test_latest_state_solution_generator.py | 2 +- tests/test_solution.py | 19 ++++--------------- tests/test_solution_generator.py | 2 +- tests/test_user_chat.py | 4 ++-- 6 files changed, 12 insertions(+), 23 deletions(-) diff --git a/tests/regtest/test_base_chat_interface.py b/tests/regtest/test_base_chat_interface.py index f7d3db5..8945f9a 100644 --- a/tests/regtest/test_base_chat_interface.py +++ b/tests/regtest/test_base_chat_interface.py @@ -2,12 +2,12 @@ import pytest -from langchain.llms.fake import FakeListLLM +from langchain_community.llms import FakeListLLM from langchain.schema import BaseMessage, HumanMessage, AIMessage, SystemMessage from esbmc_ai.ai_models import AIModel from esbmc_ai.chat_response import ChatResponse -from esbmc_ai.base_chat_interface import BaseChatInterface +from esbmc_ai.chats.base_chat_interface import BaseChatInterface from esbmc_ai.config import AIAgentConversation, ChatPromptSettings diff --git a/tests/test_base_chat_interface.py b/tests/test_base_chat_interface.py index 8d79412..42b7b1a 100644 --- a/tests/test_base_chat_interface.py +++ b/tests/test_base_chat_interface.py @@ -2,10 +2,10 @@ import pytest -from langchain.llms.fake import FakeListLLM +from langchain_community.llms import FakeListLLM from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage from esbmc_ai.ai_models import AIModel -from esbmc_ai.base_chat_interface import BaseChatInterface +from esbmc_ai.chats.base_chat_interface import BaseChatInterface from esbmc_ai.chat_response import ChatResponse from esbmc_ai.config import AIAgentConversation, ChatPromptSettings diff --git a/tests/test_latest_state_solution_generator.py b/tests/test_latest_state_solution_generator.py index 61edf4e..679e2d6 100644 --- a/tests/test_latest_state_solution_generator.py +++ b/tests/test_latest_state_solution_generator.py @@ -9,7 +9,7 @@ from esbmc_ai.ai_models import AIModel from esbmc_ai.chat_response import ChatResponse from esbmc_ai.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai.latest_state_solution_generator import LatestStateSolutionGenerator +from esbmc_ai.chats.latest_state_solution_generator import LatestStateSolutionGenerator @pytest.fixture(scope="function") diff --git a/tests/test_solution.py b/tests/test_solution.py index c3b6fa0..f5d189f 100644 --- a/tests/test_solution.py +++ b/tests/test_solution.py @@ -3,6 +3,10 @@ import pytest from esbmc_ai.solution import Solution, SourceFile +##################################### +# Solution +##################################### + @pytest.fixture(scope="function") def solution() -> Solution: @@ -44,18 +48,3 @@ def test_apply_line_patch() -> None: text = "\n".join(["a", "b", "c", "d", "e", "f", "g"]) answer = "\n".join(["a", "b", "c", "1", "e", "f", "g"]) assert SourceFile.apply_line_patch(text, "1", 3, 3) == answer - - -def test_get_patch() -> None: - raise NotImplementedError() - - -def test_update_content() -> None: - raise NotImplementedError() - - -def test_assign_verifier_output() -> None: - raise NotImplementedError() - - -# TODO Add more tests for solution and sourcefile diff --git a/tests/test_solution_generator.py b/tests/test_solution_generator.py index be6a209..ff86e7b 100644 --- a/tests/test_solution_generator.py +++ b/tests/test_solution_generator.py @@ -6,7 +6,7 @@ from esbmc_ai.ai_models import AIModel from esbmc_ai.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai.solution_generator import SolutionGenerator +from esbmc_ai.chats.solution_generator import SolutionGenerator @pytest.fixture(scope="function") diff --git a/tests/test_user_chat.py b/tests/test_user_chat.py index 3c2eca1..f0198ca 100644 --- a/tests/test_user_chat.py +++ b/tests/test_user_chat.py @@ -2,13 +2,13 @@ import pytest -from langchain.llms.fake import FakeListLLM +from langchain_community.llms import FakeListLLM from langchain.schema import AIMessage, SystemMessage from esbmc_ai.ai_models import AIModel from esbmc_ai.chat_response import ChatResponse, FinishReason from esbmc_ai.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai.user_chat import UserChat +from esbmc_ai.chats.user_chat import UserChat @pytest.fixture