diff --git a/.pylintrc b/.pylintrc index c6dcdc0..b5c747f 100644 --- a/.pylintrc +++ b/.pylintrc @@ -430,7 +430,8 @@ disable=raw-checker-failed, deprecated-pragma, use-symbolic-message-instead, use-implicit-booleaness-not-comparison-to-string, - use-implicit-booleaness-not-comparison-to-zero + use-implicit-booleaness-not-comparison-to-zero, + unspecified-encoding # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/.vscode/launch.json b/.vscode/launch.json index 99814f0..caf4bfc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -6,7 +6,7 @@ "configurations": [ { "name": "Launch ESBMC-AI on Open File", - "type": "python", + "type": "debugpy", "request": "launch", "module": "esbmc_ai", "justMyCode": true, @@ -17,7 +17,7 @@ }, { "name": "Fix Code on Open File", - "type": "python", + "type": "debugpy", "request": "launch", "module": "esbmc_ai", "justMyCode": true, @@ -31,7 +31,7 @@ }, { "name": "Optimize Code on Open File", - "type": "python", + "type": "debugpy", "request": "launch", "module": "esbmc_ai", "justMyCode": true, @@ -44,7 +44,7 @@ }, { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", @@ -52,7 +52,7 @@ }, { "name": "Run tests", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pytest", "justMyCode": true, @@ -63,7 +63,7 @@ }, { "name": "Run coverage tests", - "type": "python", + "type": "debugpy", "request": "launch", "module": "pytest", "justMyCode": true, diff --git a/Pipfile b/Pipfile index c739635..3f3c2e2 100644 --- a/Pipfile +++ b/Pipfile @@ -25,6 +25,7 @@ clang = "*" langchain = "*" langchain-openai = "*" langchain-community = "*" +lizard = "*" [dev-packages] pylint = "*" diff --git a/config.json b/config.json index 06a4f43..dd1f40a 100644 --- a/config.json +++ b/config.json @@ -70,7 +70,7 @@ ] }, "generate_solution": { - "max_attempts": 4, + "max_attempts": 5, "temperature": 0.0, "message_history": "normal", "scenarios": { @@ -87,8 +87,13 @@ ] } }, - "system": [], - "initial": "From now on, act as an Automated Code Repair Tool that repairs AI C code. You will be shown AI C code, along with ESBMC output. Pay close attention to the ESBMC output, which contains a stack trace along with the type of error that occurred and its location. Provide the repaired C code as output, as would an Automated Code Repair Tool. Aside from the corrected source code, do not output any other text. The ESBMC output is {esbmc_output} The source code is {source_code}" + "system": [ + { + "role": "System", + "content": "From now on, act as an Automated Code Repair Tool that repairs AI C code. You will be shown AI C code, along with ESBMC output. Pay close attention to the ESBMC output, which contains a stack trace along with the type of error that occurred and its location. " + } + ], + "initial": "Provide the repaired C code as output, as would an Automated Code Repair Tool. Aside from the corrected source code, do not output any other text. The ESBMC output is {esbmc_output} The source code is {source_code}" } } } \ No newline at end of file diff --git a/esbmc_ai/__main__.py b/esbmc_ai/__main__.py index 8c528d3..e9a715d 100755 --- a/esbmc_ai/__main__.py +++ b/esbmc_ai/__main__.py @@ -3,11 +3,17 @@ # Author: Yiannis Charalambous 2023 import os +from pathlib import Path import re import sys # Enables arrow key functionality for input(). Do not remove import. import readline +from typing import Optional + +from esbmc_ai.commands.fix_code_command import FixCodeCommandResult + +_ = readline import argparse from langchain.base_language import BaseLanguageModel @@ -15,12 +21,7 @@ import esbmc_ai.config as config from esbmc_ai import __author__, __version__ -from esbmc_ai.frontend.solution import ( - SourceFile, - get_main_source_file, - set_main_source_file, - get_main_source_file_path, -) +from esbmc_ai.solution import SourceFile, Solution, get_solution from esbmc_ai.commands import ( ChatCommand, @@ -30,7 +31,7 @@ ) from esbmc_ai.loading_widget import LoadingWidget, create_loading_widget -from esbmc_ai.user_chat import UserChat +from esbmc_ai.chats import UserChat from esbmc_ai.logging import print_horizontal_line, printv, printvv from esbmc_ai.esbmc_util import esbmc from esbmc_ai.chat_response import FinishReason, ChatResponse @@ -123,7 +124,34 @@ def init_commands_list() -> None: def update_solution(source_code: str) -> None: - set_main_source_file(SourceFile(get_main_source_file_path(), source_code)) + get_solution().files[0].update_content(content=source_code, reset_changes=True) + + +def _run_esbmc(source_file: SourceFile, anim: Optional[LoadingWidget] = None) -> str: + assert source_file.file_path + + if anim: + anim.start("ESBMC is processing... Please Wait") + exit_code, esbmc_output = esbmc( + path=source_file.file_path, + esbmc_params=config.esbmc_params, + timeout=config.verifier_timeout, + ) + if anim: + anim.stop() + + # ESBMC will output 0 for verification success and 1 for verification + # failed, if anything else gets thrown, it's an ESBMC error. + if not config.allow_successful and exit_code == 0: + printv("Success!") + print(esbmc_output) + sys.exit(0) + elif exit_code != 0 and exit_code != 1: + printv(f"ESBMC exit code: {exit_code}") + printv(f"ESBMC Output:\n\n{esbmc_output}") + sys.exit(1) + + return esbmc_output def init_commands() -> None: @@ -137,25 +165,30 @@ def init_commands() -> None: fix_code_command.on_solution_signal.add_listener(update_solution) -def _run_command_mode( - command: ChatCommand, - args: list[str], - esbmc_output: str, - source_code: str, -) -> None: +def _run_command_mode(command: ChatCommand, args: argparse.Namespace) -> None: + path_arg: Path = Path(args.filename) + + solution: Solution = get_solution() + if path_arg.is_dir(): + for path in path_arg.glob("**/*"): + if path.is_file() and path.name: + solution.add_source_file(path, None) + else: + solution.add_source_file(path_arg, None) + match command.command_name: case fix_code_command.command_name: - error, solution = fix_code_command.execute( - file_name=get_main_source_file_path(), - source_code=source_code, - esbmc_output=esbmc_output, - ) - - if error: - print("Failed all attempts...") - sys.exit(1) - else: - print(solution) + for source_file in solution.files: + # Run ESBMC first round + esbmc_output: str = _run_esbmc(source_file) + source_file.assign_verifier_output(esbmc_output) + + result: FixCodeCommandResult = fix_code_command.execute( + source_file=source_file, + generate_patches=config.generate_patches, + ) + + print(result) case _: command.execute() sys.exit(0) @@ -249,7 +282,15 @@ def main() -> None: + "}", ) - args = parser.parse_args() + parser.add_argument( + "-p", + "--generate-patches", + action="store_true", + default=False, + help="Generate patch files and place them in the same folder as the source files.", + ) + + args: argparse.Namespace = parser.parse_args() print(f"ESBMC-AI {__version__}") print(f"Made by {__author__}") @@ -270,39 +311,12 @@ def main() -> None: printv("Reading source code...") print(f"Running ESBMC with {config.esbmc_params}\n") - # Cast to string (for language servers) - args.filename = str(args.filename) - - # Read source code - with open(args.filename, mode="r") as file: - # Add the main source file to the solution explorer. - set_main_source_file(SourceFile(args.filename, file.read())) + assert isinstance(args.filename, str) - anim.start("ESBMC is processing... Please Wait") - exit_code, esbmc_output = esbmc( - path=get_main_source_file_path(), - esbmc_params=config.esbmc_params, - timeout=config.verifier_timeout, - ) - anim.stop() - - # Print verbose lvl 2 - print_horizontal_line(2) - printvv(esbmc_output) - print_horizontal_line(2) - - # ESBMC will output 0 for verification success and 1 for verification - # failed, if anything else gets thrown, it's an ESBMC error. - if not config.allow_successful and exit_code == 0: - print("Success!") - print(esbmc_output) - sys.exit(0) - elif exit_code != 0 and exit_code != 1: - print(f"ESBMC exit code: {exit_code}") - print(f"ESBMC Output:\n\n{esbmc_output}") - sys.exit(1) - - # Command mode: Check if command is called and call it. + # =========================================== + # Command mode + # =========================================== + # Check if command is called and call it. # If not, then continue to user mode. if args.command != None: command = args.command @@ -310,14 +324,46 @@ def main() -> None: print("Running Command:", command) for idx, command_name in enumerate(command_names): if command == command_name: - _run_command_mode( - command=commands[idx], - args=[], # NOTE: Currently not supported... - source_code=get_main_source_file().content, - esbmc_output=esbmc_output, - ) + _run_command_mode(command=commands[idx], args=args) sys.exit(0) + # =========================================== + # User Mode (Supports only 1 file) + # =========================================== + + # Init Solution + solution: Solution + # Determine if we are processing one file versus multiple files + path_arg: Path = Path(args.filename) + if path_arg.is_dir(): + # Load only files. + print( + "Processing multiple files is not supported in User Mode." + "Call a command using -c to process directories" + ) + sys.exit(1) + else: + # Add the main source file to the solution explorer. + solution: Solution = get_solution() + solution.add_source_file(path_arg, None) + del path_arg + + # Assert that we have one file and one filepath + assert len(solution.files) == 1 + + source_file: SourceFile = solution.files[0] + assert source_file.file_path + + esbmc_output: str = _run_esbmc(source_file, anim) + + # Print verbose lvl 2 + print_horizontal_line(2) + printvv(esbmc_output) + print_horizontal_line(2) + + source_file.assign_verifier_output(esbmc_output) + del esbmc_output + printv(f"Initializing the LLM: {config.ai_model.name}\n") chat_llm: BaseLanguageModel = config.ai_model.create_llm( api_keys=config.api_keys, @@ -332,8 +378,8 @@ def main() -> None: ai_model_agent=config.chat_prompt_user_mode, ai_model=config.ai_model, llm=chat_llm, - source_code=get_main_source_file().content, - esbmc_output=esbmc_output, + source_code=source_file.latest_content, + esbmc_output=source_file.latest_verifier_output, set_solution_messages=config.chat_prompt_user_mode.scenarios["set_solution"], ) @@ -345,7 +391,6 @@ def main() -> None: if len(config.chat_prompt_user_mode.initial_prompt) > 0: printv("Using initial prompt from file...\n") anim.start("Model is parsing ESBMC output... Please Wait") - # TODO Make protected response = chat.send_message( message=config.chat_prompt_user_mode.initial_prompt, ) @@ -375,17 +420,16 @@ def main() -> None: print() print("ESBMC-AI will generate a fix for the code...") - error, solution = fix_code_command.execute( - file_name=get_main_source_file_path(), - source_code=get_main_source_file().content, - esbmc_output=esbmc_output, + result: FixCodeCommandResult = fix_code_command.execute( + source_file=source_file, + generate_patches=config.generate_patches, ) - if not error: + if result.successful: print( "\n\nESBMC-AI: Here is the corrected code, verified with ESBMC:" ) - print(f"```\n{solution}\n```") + print(f"```\n{result.repaired_source}\n```") continue else: # Commands without parameters or returns are handled automatically. diff --git a/esbmc_ai/chats/__init__.py b/esbmc_ai/chats/__init__.py new file mode 100644 index 0000000..e2b4c2a --- /dev/null +++ b/esbmc_ai/chats/__init__.py @@ -0,0 +1,13 @@ +# Author: Yiannis Charalambous + +from .base_chat_interface import BaseChatInterface +from .latest_state_solution_generator import LatestStateSolutionGenerator +from .solution_generator import SolutionGenerator +from .user_chat import UserChat + +__all__ = [ + "BaseChatInterface", + "LatestStateSolutionGenerator", + "SolutionGenerator", + "UserChat", +] diff --git a/esbmc_ai/base_chat_interface.py b/esbmc_ai/chats/base_chat_interface.py similarity index 97% rename from esbmc_ai/base_chat_interface.py rename to esbmc_ai/chats/base_chat_interface.py index e6fc41f..6344734 100644 --- a/esbmc_ai/base_chat_interface.py +++ b/esbmc_ai/chats/base_chat_interface.py @@ -13,8 +13,8 @@ ) from esbmc_ai.config import ChatPromptSettings -from .chat_response import ChatResponse, FinishReason -from .ai_models import AIModel +from esbmc_ai.chat_response import ChatResponse, FinishReason +from esbmc_ai.ai_models import AIModel class BaseChatInterface(object): diff --git a/esbmc_ai/latest_state_solution_generator.py b/esbmc_ai/chats/latest_state_solution_generator.py similarity index 93% rename from esbmc_ai/latest_state_solution_generator.py rename to esbmc_ai/chats/latest_state_solution_generator.py index 81d15a5..0bda6f8 100644 --- a/esbmc_ai/latest_state_solution_generator.py +++ b/esbmc_ai/chats/latest_state_solution_generator.py @@ -2,7 +2,7 @@ from typing_extensions import override from langchain_core.messages import BaseMessage -from esbmc_ai.solution_generator import SolutionGenerator +from esbmc_ai.chats.solution_generator import SolutionGenerator from esbmc_ai.chat_response import FinishReason # TODO Test me diff --git a/esbmc_ai/solution_generator.py b/esbmc_ai/chats/solution_generator.py similarity index 97% rename from esbmc_ai/solution_generator.py rename to esbmc_ai/chats/solution_generator.py index 95b8eec..6187ee3 100644 --- a/esbmc_ai/solution_generator.py +++ b/esbmc_ai/chats/solution_generator.py @@ -8,9 +8,9 @@ from esbmc_ai.chat_response import ChatResponse, FinishReason from esbmc_ai.config import ChatPromptSettings, DynamicAIModelAgent -from esbmc_ai.frontend.solution import apply_line_patch +from esbmc_ai.solution import SourceFile -from .ai_models import AIModel +from esbmc_ai.ai_models import AIModel from .base_chat_interface import BaseChatInterface from esbmc_ai.esbmc_util import ( esbmc_get_counter_example, @@ -205,6 +205,8 @@ def generate_solution(self) -> tuple[str, FinishReason]: assert ( line ), "fix code command: error line could not be found to apply brutal patch replacement" - solution = apply_line_patch(self.source_code_raw, solution, line, line) + solution = SourceFile.apply_line_patch( + self.source_code_raw, solution, line, line + ) return solution, response.finish_reason diff --git a/esbmc_ai/user_chat.py b/esbmc_ai/chats/user_chat.py similarity index 98% rename from esbmc_ai/user_chat.py rename to esbmc_ai/chats/user_chat.py index 0e1bbfd..3c1cdd6 100644 --- a/esbmc_ai/user_chat.py +++ b/esbmc_ai/chats/user_chat.py @@ -9,8 +9,8 @@ from langchain.schema import BaseMessage, SystemMessage from esbmc_ai.config import AIAgentConversation, ChatPromptSettings +from esbmc_ai.ai_models import AIModel -from .ai_models import AIModel from .base_chat_interface import BaseChatInterface diff --git a/esbmc_ai/commands/chat_command.py b/esbmc_ai/commands/chat_command.py index c91105e..6109e1c 100644 --- a/esbmc_ai/commands/chat_command.py +++ b/esbmc_ai/commands/chat_command.py @@ -3,6 +3,8 @@ from abc import ABC, abstractmethod from typing import Any, Optional +from esbmc_ai.commands.command_result import CommandResult + class ChatCommand(ABC): command_name: str @@ -17,7 +19,6 @@ def __init__( self.command_name = command_name self.help_message = help_message - @abstractmethod - def execute(self, **kwargs: Optional[Any]) -> Optional[Any]: + def execute(self, **kwargs: Optional[Any]) -> Optional[CommandResult]: raise NotImplementedError(f"Command {self.command_name} is not implemented.") diff --git a/esbmc_ai/commands/command_result.py b/esbmc_ai/commands/command_result.py new file mode 100644 index 0000000..58703cc --- /dev/null +++ b/esbmc_ai/commands/command_result.py @@ -0,0 +1,15 @@ +# Author: Yiannis Charalambous + +from abc import abstractmethod + + +class CommandResult: + @property + @abstractmethod + def successful(self) -> bool: + raise NotImplementedError() + + def __str__(self) -> str: + return f"Command returned " + ( + "successful" if self.successful else "unsuccessful" + ) diff --git a/esbmc_ai/commands/exit_command.py b/esbmc_ai/commands/exit_command.py index 2fb38dd..943c446 100644 --- a/esbmc_ai/commands/exit_command.py +++ b/esbmc_ai/commands/exit_command.py @@ -15,6 +15,6 @@ def __init__(self) -> None: ) @override - def execute(self, **_: Optional[Any]) -> Optional[Any]: + def execute(self, **_: Optional[Any]) -> None: print("exiting...") sys.exit(0) diff --git a/esbmc_ai/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py index 97abcab..dce3e2b 100644 --- a/esbmc_ai/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -1,12 +1,15 @@ # Author: Yiannis Charalambous import sys -from typing import Any, Tuple +from typing import Any, Optional, Tuple from typing_extensions import override from esbmc_ai.chat_response import FinishReason -from esbmc_ai.latest_state_solution_generator import LatestStateSolutionGenerator +from esbmc_ai.chats import LatestStateSolutionGenerator, SolutionGenerator +from esbmc_ai.chats.solution_generator import ESBMCTimedOutException +from esbmc_ai.commands.command_result import CommandResult from esbmc_ai.reverse_order_solution_generator import ReverseOrderSolutionGenerator +from esbmc_ai.solution import SourceFile from .chat_command import ChatCommand from .. import config @@ -16,11 +19,28 @@ esbmc_get_error_type, esbmc_load_source_code, ) -from ..solution_generator import ( - ESBMCTimedOutException, - SolutionGenerator, -) from ..logging import print_horizontal_line, printv, printvv +from subprocess import CalledProcessError + + +class FixCodeCommandResult(CommandResult): + def __init__(self, successful: bool, repaired_source: Optional[str] = None) -> None: + super().__init__() + self._successful: bool = successful + self.repaired_source: Optional[str] = repaired_source + + @property + @override + def successful(self) -> bool: + return self._successful + + @override + def __str__(self) -> str: + return ( + self.repaired_source + if self._successful and self.repaired_source != None + else "ESBMC-AI Notice: Failed all attempts..." + ) class FixCodeCommand(ChatCommand): @@ -34,7 +54,7 @@ def __init__(self) -> None: self.anim = create_loading_widget() @override - def execute(self, **kwargs: Any) -> Tuple[bool, str]: + def execute(self, **kwargs: Any) -> FixCodeCommandResult: def print_raw_conversation() -> None: print_horizontal_line(0) print("ESBMC-AI Notice: Printing raw conversation...") @@ -44,12 +64,14 @@ def print_raw_conversation() -> None: print("\n" + "\n\n".join(messages)) print("ESBMC-AI Notice: End of raw conversation") - file_name: str = kwargs["file_name"] - source_code: str = kwargs["source_code"] - esbmc_output: str = kwargs["esbmc_output"] + source_file: SourceFile = kwargs["source_file"] + assert source_file.file_path + generate_patches: bool = ( + kwargs["generate_patches"] if "generate_patches" in kwargs else False + ) # Parse the esbmc output here and determine what "Scenario" to use. - scenario: str = esbmc_get_error_type(esbmc_output) + scenario: str = esbmc_get_error_type(source_file.initial_verifier_output) printv(f"Scenario: {scenario}") printv( @@ -108,8 +130,8 @@ def print_raw_conversation() -> None: try: solution_generator.update_state( - source_code=source_code, - esbmc_output=esbmc_output, + source_code=source_file.latest_content, + esbmc_output=source_file.latest_verifier_output, ) except ESBMCTimedOutException: print("error: ESBMC has timed out...") @@ -117,8 +139,8 @@ def print_raw_conversation() -> None: print() - max_retries: int = config.fix_code_max_attempts - for idx in range(max_retries): + attempts: int = config.fix_code_max_attempts + for attempt in range(1, attempts + 1): # Get a response. Use while loop to account for if the message stack # gets full, then need to compress and retry. while True: @@ -129,13 +151,13 @@ def print_raw_conversation() -> None: if finish_reason == FinishReason.length: solution_generator.compress_message_stack() else: - source_code = llm_solution + source_file.update_content(llm_solution) break # Print verbose lvl 2 printvv("\nESBMC-AI Notice: Source Code Generation:") print_horizontal_line(2) - printvv(source_code) + printvv(source_file.latest_content) print_horizontal_line(2) printvv("") @@ -143,34 +165,45 @@ def print_raw_conversation() -> None: # to a temporary location since ESBMC needs it in file format. self.anim.start("Verifying with ESBMC... Please Wait") exit_code, esbmc_output = esbmc_load_source_code( - file_path=file_name, - source_code=source_code, + source_file=source_file, + source_file_content_index=-1, esbmc_params=config.esbmc_params, auto_clean=config.temp_auto_clean, timeout=config.verifier_timeout, ) self.anim.stop() + source_file.assign_verifier_output(esbmc_output) + del esbmc_output + # Print verbose lvl 2 printvv("\nESBMC-AI Notice: ESBMC Output:") print_horizontal_line(2) - printvv(esbmc_output) + printvv(source_file.latest_verifier_output) print_horizontal_line(2) # Solution found if exit_code == 0: - self.on_solution_signal.emit(source_code) + self.on_solution_signal.emit(source_file.latest_content) if config.raw_conversation: print_raw_conversation() printv("ESBMC-AI Notice: Successfully verified code") - return False, source_code + returned_source: str + if generate_patches: + returned_source = source_file.get_patch(0, -1) + else: + returned_source = source_file.latest_content + + return FixCodeCommandResult(True, returned_source) try: # Update state - solution_generator.update_state(source_code, esbmc_output) + solution_generator.update_state( + source_file.latest_content, source_file.latest_verifier_output + ) except ESBMCTimedOutException: if config.raw_conversation: print_raw_conversation() @@ -178,13 +211,12 @@ def print_raw_conversation() -> None: sys.exit(1) # Failure case - print( - f"ESBMC-AI Notice: Failure {idx+1}/{max_retries}" + ": Retrying..." - if idx != max_retries - 1 - else "" - ) + if attempt != attempts: + print(f"ESBMC-AI Notice: Failure {attempt}/{attempts}: Retrying...") + else: + print(f"ESBMC-AI Notice: Failure {attempt}/{attempts}") if config.raw_conversation: print_raw_conversation() - return True, "ESBMC-AI Notice: Failed all attempts..." + return FixCodeCommandResult(False, None) diff --git a/esbmc_ai/config.py b/esbmc_ai/config.py index 44007b3..28b4ccc 100644 --- a/esbmc_ai/config.py +++ b/esbmc_ai/config.py @@ -37,7 +37,7 @@ ] temp_auto_clean: bool = True -temp_file_dir: str = "." +temp_file_dir: Optional[str] = None ai_model: AIModel esbmc_output_type: str = "full" @@ -57,6 +57,8 @@ cfg_path: str +generate_patches: bool + # TODO Get rid of this class as soon as ConfigTool with the pyautoconfig class AIAgentConversation(NamedTuple): @@ -498,3 +500,6 @@ def load_args(args) -> None: esbmc_params.extend(args.remaining) elif len(args.remaining) != 0: esbmc_params = args.remaining + + global generate_patches + generate_patches = args.generate_patches diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 4a12013..a034a2e 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -6,6 +6,8 @@ from pathlib import Path from typing import Optional +from esbmc_ai.solution import SourceFile + from . import config @@ -91,13 +93,13 @@ def get_clang_err_line_index(clang_output: str) -> Optional[int]: return None -def esbmc(path: str, esbmc_params: list, timeout: Optional[float] = None): +def esbmc(path: Path, esbmc_params: list, timeout: Optional[float] = None): """Exit code will be 0 if verification successful, 1 if verification failed. And any other number for compilation error/general errors.""" # Build parameters esbmc_cmd = [config.esbmc_path] esbmc_cmd.extend(esbmc_params) - esbmc_cmd.append(path) + esbmc_cmd.append(str(path)) if "--timeout" in esbmc_cmd: print( @@ -123,38 +125,34 @@ def esbmc(path: str, esbmc_params: list, timeout: Optional[float] = None): def esbmc_load_source_code( - file_path: str, - source_code: str, + source_file: SourceFile, + source_file_content_index: int, esbmc_params: list = config.esbmc_params, auto_clean: bool = config.temp_auto_clean, timeout: Optional[float] = None, ): - source_code_path = Path(file_path) - - # Create temp path. - delete_path: bool = False - if not os.path.exists(config.temp_file_dir): - os.mkdir(config.temp_file_dir) - delete_path = True - - temp_file_path = f"{config.temp_file_dir}{os.sep}{source_code_path.name}" - # Create temp file. - with open(temp_file_path, "w") as file: - # Save to temporary folder and flush contents. - file.write(source_code) - file.flush() + file_path: Path + if config.temp_file_dir: + file_path = source_file.save_file( + file_path=Path(config.temp_file_dir), + temp_dir=False, + index=source_file_content_index, + ) + else: + file_path = source_file.save_file( + file_path=None, + temp_dir=True, + index=source_file_content_index, + ) - # Call ESBMC to temporary folder. - results = esbmc(file.name, esbmc_params, timeout=timeout) + # Call ESBMC to temporary folder. + results = esbmc(file_path, esbmc_params, timeout=timeout) # Delete temp files and path if auto_clean: # Remove file - os.remove(temp_file_path) - # Remove file path if created this run and is empty. - if delete_path and len(os.listdir(config.temp_file_dir)) == 0: - os.rmdir(config.temp_file_dir) + os.remove(file_path) # Return return results diff --git a/esbmc_ai/frontend/__init__.py b/esbmc_ai/frontend/__init__.py deleted file mode 100644 index 1f91aa3..0000000 --- a/esbmc_ai/frontend/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Author: Yiannis Charalambous diff --git a/esbmc_ai/frontend/solution.py b/esbmc_ai/frontend/solution.py deleted file mode 100644 index 3437ddf..0000000 --- a/esbmc_ai/frontend/solution.py +++ /dev/null @@ -1,61 +0,0 @@ -# Author: Yiannis Charalambous - -"""# Solution -Keeps track of all the source files that ESBMC-AI is targeting.""" - -from typing import NamedTuple - - -class SourceFile(NamedTuple): - file_path: str - content: str - - -_main_source_file: SourceFile = SourceFile("", "") -_source_files: set[SourceFile] = set() - - -def add_source_file(source_file: SourceFile) -> None: - global _source_files - _source_files.add(source_file) - - -def set_main_source_file(source_file: SourceFile) -> None: - add_source_file(source_file) - global _main_source_file - _main_source_file = source_file - - -def get_main_source_file_path() -> str: - global _main_source_file - return _main_source_file.file_path - - -def get_main_source_file() -> SourceFile: - global _main_source_file - return _main_source_file - - -def get_source_files() -> list[SourceFile]: - global _source_files - return list(_source_files) - - -def apply_line_patch(source_code: str, patch: str, start: int, end: int) -> str: - """Applies a patch to the source code. - - To replace a single line, start and end are equal. - - Args: - * source_code - The source code to apply the patch to. - * patch - Can be a line or multiple lines but will replace the start and - end region defined. - * start - Line index to mark start of replacement. - * end - Marks the end of the region where the patch will be applied to. - End is non-inclusive.""" - assert ( - start <= end - ), f"start ({start}) needs to be less than or equal to end ({end})" - lines: list[str] = source_code.splitlines() - lines = lines[:start] + [patch] + lines[end + 1 :] - return "\n".join(lines) diff --git a/esbmc_ai/optimize_code.py b/esbmc_ai/optimize_code.py deleted file mode 100644 index 092c2ce..0000000 --- a/esbmc_ai/optimize_code.py +++ /dev/null @@ -1,37 +0,0 @@ -# Author: Yiannis Charalambous - -from langchain.base_language import BaseLanguageModel -from langchain.schema import AIMessage, BaseMessage, HumanMessage - -from esbmc_ai.config import ChatPromptSettings -from .base_chat_interface import BaseChatInterface, ChatResponse -from .ai_models import AIModel - - -class OptimizeCode(BaseChatInterface): - initial_message: str - - def __init__( - self, - ai_model_agent: ChatPromptSettings, - initial_message: str, - ai_model: AIModel, - llm: BaseLanguageModel, - ) -> None: - super().__init__(ai_model_agent=ai_model_agent, ai_model=ai_model, llm=llm) - - self.initial_message = initial_message - - def optimize_function(self, source_code: str, function_name: str) -> ChatResponse: - self.messages = [] - self.push_to_message_stack( - HumanMessage( - content=f"Reply OK if you understand the following is the source code to optimize:\n\n{source_code}" - ) - ) - self.push_to_message_stack(AIMessage(content="OK.")) - - expanded_initial_message: str = self.initial_message.replace( - "%s", function_name - ) - return self.send_message(expanded_initial_message) diff --git a/esbmc_ai/reverse_order_solution_generator.py b/esbmc_ai/reverse_order_solution_generator.py index bd13d46..203b2ea 100644 --- a/esbmc_ai/reverse_order_solution_generator.py +++ b/esbmc_ai/reverse_order_solution_generator.py @@ -2,7 +2,7 @@ from langchain.schema import BaseMessage from typing_extensions import override, Optional -from esbmc_ai.solution_generator import SolutionGenerator +from esbmc_ai.chats.solution_generator import SolutionGenerator from esbmc_ai.chat_response import ChatResponse # TODO Test me diff --git a/esbmc_ai/solution.py b/esbmc_ai/solution.py new file mode 100644 index 0000000..05cf2a2 --- /dev/null +++ b/esbmc_ai/solution.py @@ -0,0 +1,276 @@ +# Author: Yiannis Charalambous + +"""# Solution +Keeps track of all the source files that ESBMC-AI is targeting. """ + +import os +from typing import Optional +from pathlib import Path +from subprocess import PIPE, STDOUT, run, CompletedProcess +from tempfile import NamedTemporaryFile, gettempdir +import lizard + + +class SourceFile: + """Represents a source file in the Solution. This class also holds the + verifier output. Contains methods to manipulate and get information about + different versions.""" + + @classmethod + def apply_line_patch( + cls, source_code: str, patch: str, start: int, end: int + ) -> str: + """Applies a patch to the source code. + + To replace a single line, start and end are equal. + + Args: + * source_code - The source code to apply the patch to. + * patch - Can be a line or multiple lines but will replace the start and + end region defined. + * start - Line index to mark start of replacement. + * end - Marks the end of the region where the patch will be applied to. + End is non-inclusive.""" + assert ( + start <= end + ), f"start ({start}) needs to be less than or equal to end ({end})" + lines: list[str] = source_code.splitlines() + lines = lines[:start] + [patch] + lines[end + 1 :] + return "\n".join(lines) + + def __init__( + self, file_path: Optional[Path], content: str, file_ext: Optional[str] = None + ) -> None: + self._file_path: Optional[Path] = file_path + # Content file shows the file throughout the repair process. Index 0 is + # the orignial. + self._content: list[str] = [content] + # Map _content (each iteration) to esbmc output + self._esbmc_output: dict[int, str] = {} + self._file_ext: Optional[str] = file_ext + + @property + def file_path(self) -> Optional[Path]: + """Returns the file path of this source file.""" + return self._file_path + + @property + def file_extension(self) -> str: + if self._file_ext: + return self._file_ext + elif self._file_path: + return self._file_path.suffix + else: + raise ValueError("No extension for SourceFile could be resolved") + + @property + def initial_content(self) -> str: + """Returns the initial state content.""" + return self._content[0] + + @property + def latest_content(self) -> str: + """Returns the latest available content.""" + return self._content[-1] + + @property + def content(self) -> tuple[str, ...]: + """Returns a tuple of the content of this source file.""" + return tuple(self._content) + + @property + def initial_verifier_output(self) -> str: + """Returns the first verifier output""" + assert 0 in self._esbmc_output, "Error: No initial verifier output assigned." + return self._esbmc_output[0] + + @property + def latest_verifier_output(self) -> str: + """Returns the latest verifier output""" + return self._esbmc_output[len(self._content) - 1] + + @property + def verifier_output(self) -> dict[int, str]: + """Returns the verifier outputs of the SourceFile""" + return self._esbmc_output + + def get_patch(self, index_a: int, index_b: int) -> str: + """Return diff between `index_a` and `index_b` which are indicies + referencing the content list.""" + assert len(self._content) > index_a and len(self._content) > index_b + + # Save as temp files + with ( + NamedTemporaryFile(mode="w", delete=False) as file_a, + NamedTemporaryFile(mode="w", delete=False) as file_b, + ): + file_a.write(self._content[index_a]) + file_a.flush() + file_b.write(self._content[index_b]) + file_b.flush() + + cmd = ["diff", file_a.name, file_b.name] + process: CompletedProcess = run( + cmd, + stdout=PIPE, + stderr=STDOUT, + check=False, + ) + + # Exit status is 0 if inputs are the same, 1 if different, 2 if trouble. + # https://askubuntu.com/questions/698784/exit-code-of-diff + if process.returncode == 2: + raise ValueError( + f"Diff for {file_a.name} and {file_b.name} failed (exit 2)." + ) + + return process.stdout.decode("utf-8") + + # Not used + # def push_line_patch(self, patch: str, start: int, end: int) -> str: + # """Calls `apply_line_patch` using the latest source code and then pushes + # the new patch to the content.""" + # new_source: str = SourceFile.apply_line_patch( + # source_code=self._content[-1], + # patch=patch, + # start=start, + # end=end, + # ) + # self._content.append(new_source) + # return new_source + + def update_content(self, content: str, reset_changes: bool = False) -> None: + """Ascociates a new version of the content of source code to a file.""" + if reset_changes: + self._content = [content] + else: + self._content.append(content) + + def assign_verifier_output(self, verifier_output: str, index: int = -1) -> None: + """Assigns verifier output to the ascociated file. If no file is given, + then assigns to the latest one.""" + if index < 0: + # Simulate negative indicies like with Lists. + index = len(self._content) + index + self._esbmc_output[index] = verifier_output + + def save_file( + self, file_path: Optional[Path], temp_dir: bool = True, index: int = -1 + ) -> Path: + """Saves the source code file. If file_path is not specified, it + will generate an automatic name. If temp_dir is True, it will place + the saved file in /tmp and use the file_path file name only.""" + + file_name: Optional[str] = None + dir_path: Optional[Path] = None + if file_path: + # If file path is a file, then use the name and directory. If not + # then use a temporary name and just store the folder. + if file_path.is_file(): + file_name = file_path.name + dir_path = file_path.parent + else: + dir_path = file_path + else: + if not self._file_path: + raise ValueError( + "Source code file does not have a name or file_path to save to" + ) + # Just store the file and use the temp dir. + file_name = self._file_path.name + + if temp_dir: + dir_path = Path(gettempdir()) + + assert ( + dir_path + ), "dir_path could not be retrieved: file_path or temp_dir need to be set." + + # Create path if it does not exist. + if not os.path.exists(dir_path): + os.mkdir(dir_path) + + with NamedTemporaryFile( + mode="w", + buffering=-1, + newline=None, + suffix=self.file_extension, + prefix=file_name, + dir=dir_path, + delete=False, + ) as temp_file: + temp_file.write(self.content[index]) + return Path(temp_file.name) + + @classmethod + def calculate_cyclomatic_complexity_delta( + cls, + source_1: "SourceFile", + source_2: "SourceFile", + temp_dir: bool = True, + ) -> Optional[float]: + """Calculates the cyclomatic complexity difference between the two source files.""" + try: + file_1: Path = source_1.save_file(None, temp_dir=temp_dir) + file_2: Path = source_2.save_file(None, temp_dir=temp_dir) + + cc1 = lizard.analyze_file(file_1) + cc2 = lizard.analyze_file(file_2) + + return cc1.average_cyclomatic_complexity - cc2.average_cyclomatic_complexity + except IOError: + return None + + +class Solution: + """Represents a solution, that is a collection of all the source files that + ESBMC-AI will be involved in analyzing.""" + + def __init__(self, files: Optional[list[Path]] = None) -> None: + files = files if files else [] + self._files: list[SourceFile] = [] + for file_path in files: + with open(file_path, "r") as file: + self._files.append(SourceFile(file_path, file.read())) + + @property + def files(self) -> tuple[SourceFile, ...]: + """Will return a list of the files. Returns by value.""" + return tuple(self._files) + + @property + def files_mapped(self) -> dict[Path, SourceFile]: + """Will return the files mapped to their directory. Returns by value.""" + return { + source_file.file_path: source_file + for source_file in self._files + if source_file.file_path + } + + def add_source_file( + self, file_path: Optional[Path], content: Optional[str] + ) -> None: + """Add a source file to the solution.""" + if file_path: + if content: + self._files.append(SourceFile(file_path, content)) + else: + with open(file_path, "r") as file: + self._files.append(SourceFile(file_path, file.read())) + return + + if content: + self._files.append(SourceFile(file_path, content)) + return + + raise RuntimeError("file_path and content cannot be both invalid!") + + +# Define a global solution (is not required to be used) + +_solution: Solution = Solution() + + +def get_solution() -> Solution: + """Returns the global default solution object.""" + return _solution diff --git a/scripts/function_equivalence.c b/scripts/function_equivalence.c deleted file mode 100644 index 16cb943..0000000 --- a/scripts/function_equivalence.c +++ /dev/null @@ -1,33 +0,0 @@ -#include - -/////////////////////////////////////////// -///////////// OLD ///////////////////////// -/////////////////////////////////////////// - -$function_old; - -/////////////////////////////////////////// -///////////// NEW ///////////////////////// -/////////////////////////////////////////// - -$function_new; - -/////////////////////////////////////////// -///////////// ESBMC MAIN ////////////////// -/////////////////////////////////////////// - -int main() -{ - // Need to dynamically build parameters to supply. - $parameters_list; - - // Need to call old and new functions and compare results. - $function_call_old; - - $function_call_new; - - // Check whether both functions produce the same output - assert($function_assert_old == $function_assert_new); - - return 0; -} diff --git a/tests/regtest/test_base_chat_interface.py b/tests/regtest/test_base_chat_interface.py index f7d3db5..8945f9a 100644 --- a/tests/regtest/test_base_chat_interface.py +++ b/tests/regtest/test_base_chat_interface.py @@ -2,12 +2,12 @@ import pytest -from langchain.llms.fake import FakeListLLM +from langchain_community.llms import FakeListLLM from langchain.schema import BaseMessage, HumanMessage, AIMessage, SystemMessage from esbmc_ai.ai_models import AIModel from esbmc_ai.chat_response import ChatResponse -from esbmc_ai.base_chat_interface import BaseChatInterface +from esbmc_ai.chats.base_chat_interface import BaseChatInterface from esbmc_ai.config import AIAgentConversation, ChatPromptSettings diff --git a/tests/test_base_chat_interface.py b/tests/test_base_chat_interface.py index 8d79412..42b7b1a 100644 --- a/tests/test_base_chat_interface.py +++ b/tests/test_base_chat_interface.py @@ -2,10 +2,10 @@ import pytest -from langchain.llms.fake import FakeListLLM +from langchain_community.llms import FakeListLLM from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage from esbmc_ai.ai_models import AIModel -from esbmc_ai.base_chat_interface import BaseChatInterface +from esbmc_ai.chats.base_chat_interface import BaseChatInterface from esbmc_ai.chat_response import ChatResponse from esbmc_ai.config import AIAgentConversation, ChatPromptSettings diff --git a/tests/test_latest_state_solution_generator.py b/tests/test_latest_state_solution_generator.py index 61edf4e..679e2d6 100644 --- a/tests/test_latest_state_solution_generator.py +++ b/tests/test_latest_state_solution_generator.py @@ -9,7 +9,7 @@ from esbmc_ai.ai_models import AIModel from esbmc_ai.chat_response import ChatResponse from esbmc_ai.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai.latest_state_solution_generator import LatestStateSolutionGenerator +from esbmc_ai.chats.latest_state_solution_generator import LatestStateSolutionGenerator @pytest.fixture(scope="function") diff --git a/tests/test_solution.py b/tests/test_solution.py index e303c49..f5d189f 100644 --- a/tests/test_solution.py +++ b/tests/test_solution.py @@ -1,13 +1,50 @@ # Author: Yiannis Charalambous -from esbmc_ai.frontend.solution import apply_line_patch +import pytest +from esbmc_ai.solution import Solution, SourceFile + +##################################### +# Solution +##################################### + + +@pytest.fixture(scope="function") +def solution() -> Solution: + return Solution() + + +def test_add_source_file(solution) -> None: + src: str = "int main(int argc, char** argv) {return 0;}" + solution.add_source_file(None, src) + assert ( + len(solution.files) == 1 + and solution.files[0].file_path == None + and solution.files[0].latest_content == src + ) + src = '#include int main(int argc, char** argv) { printf("hello world\n"); return 0;}' + solution.add_source_file("Testfile1", src) + assert ( + len(solution.files) == 2 + and solution.files[1].file_path == "Testfile1" + and solution.files[1].latest_content == src + ) + assert ( + len(solution.files_mapped) == 1 + and solution.files_mapped["Testfile1"].file_path == "Testfile1" + and solution.files_mapped["Testfile1"].initial_content == src + ) + + +##################################### +# SourceFile +##################################### def test_apply_line_patch() -> None: text = "\n".join(["a", "b", "c", "d", "e", "f", "g"]) answer = "\n".join(["a", "b", "1", "g"]) - assert apply_line_patch(text, "1", 2, 5) == answer + assert SourceFile.apply_line_patch(text, "1", 2, 5) == answer text = "\n".join(["a", "b", "c", "d", "e", "f", "g"]) answer = "\n".join(["a", "b", "c", "1", "e", "f", "g"]) - assert apply_line_patch(text, "1", 3, 3) == answer + assert SourceFile.apply_line_patch(text, "1", 3, 3) == answer diff --git a/tests/test_solution_generator.py b/tests/test_solution_generator.py index be6a209..ff86e7b 100644 --- a/tests/test_solution_generator.py +++ b/tests/test_solution_generator.py @@ -6,7 +6,7 @@ from esbmc_ai.ai_models import AIModel from esbmc_ai.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai.solution_generator import SolutionGenerator +from esbmc_ai.chats.solution_generator import SolutionGenerator @pytest.fixture(scope="function") diff --git a/tests/test_user_chat.py b/tests/test_user_chat.py index 3c2eca1..f0198ca 100644 --- a/tests/test_user_chat.py +++ b/tests/test_user_chat.py @@ -2,13 +2,13 @@ import pytest -from langchain.llms.fake import FakeListLLM +from langchain_community.llms import FakeListLLM from langchain.schema import AIMessage, SystemMessage from esbmc_ai.ai_models import AIModel from esbmc_ai.chat_response import ChatResponse, FinishReason from esbmc_ai.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai.user_chat import UserChat +from esbmc_ai.chats.user_chat import UserChat @pytest.fixture