diff --git a/build.sh b/build.sh index 8ba0dee..ef7e728 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,6 @@ #!/usr/bin/env sh echo "Building ESBMC-AI" -python3 -m build +# python3 -m build +hatch build diff --git a/esbmc_ai/__about__.py b/esbmc_ai/__about__.py index 0f1356e..7415113 100644 --- a/esbmc_ai/__about__.py +++ b/esbmc_ai/__about__.py @@ -1,4 +1,4 @@ # Author: Yiannis Charalambous -__version__ = "v0.5.0.dev6" +__version__ = "v0.5.0rc0" __author__: str = "Yiannis Charalambous" diff --git a/esbmc_ai/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py index 3091fe2..b2db77d 100644 --- a/esbmc_ai/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -1,6 +1,6 @@ # Author: Yiannis Charalambous -from os import get_terminal_size +import sys from typing import Any, Tuple from typing_extensions import override from langchain.schema import AIMessage, HumanMessage @@ -15,7 +15,12 @@ esbmc_get_error_type, esbmc_load_source_code, ) -from ..solution_generator import SolutionGenerator, get_esbmc_output_formatted +from ..solution_generator import ( + ESBMCTimedOutException, + SolutionGenerator, + SourceCodeParseError, + get_esbmc_output_formatted, +) from ..logging import print_horizontal_line, printv, printvv # TODO Remove built in messages and move them to config. @@ -34,12 +39,13 @@ def __init__(self) -> None: @override def execute(self, **kwargs: Any) -> Tuple[bool, str]: def print_raw_conversation() -> None: - print("Notice: Printing raw conversation...") + print_horizontal_line(0) + print("ESBMC-AI Notice: Printing raw conversation...") all_messages = solution_generator._system_messages.copy() all_messages.extend(solution_generator.messages.copy()) messages: list[str] = [f"{msg.type}: {msg.content}" for msg in all_messages] print("\n" + "\n\n".join(messages)) - print("Notice: End of conversation") + print("ESBMC-AI Notice: End of raw conversation") file_name: str = kwargs["file_name"] source_code: str = kwargs["source_code"] @@ -55,21 +61,25 @@ def print_raw_conversation() -> None: else "Using generic prompt..." ) - solution_generator = SolutionGenerator( - ai_model_agent=config.chat_prompt_generator_mode, - source_code=source_code, - esbmc_output=esbmc_output, - ai_model=config.ai_model, - llm=config.ai_model.create_llm( - api_keys=config.api_keys, - temperature=config.chat_prompt_generator_mode.temperature, - requests_max_tries=config.requests_max_tries, - requests_timeout=config.requests_timeout, - ), - scenario=scenario, - source_code_format=config.source_code_format, - esbmc_output_type=config.esbmc_output_type, - ) + try: + solution_generator = SolutionGenerator( + ai_model_agent=config.chat_prompt_generator_mode, + source_code=source_code, + esbmc_output=esbmc_output, + ai_model=config.ai_model, + llm=config.ai_model.create_llm( + api_keys=config.api_keys, + temperature=config.chat_prompt_generator_mode.temperature, + requests_max_tries=config.requests_max_tries, + requests_timeout=config.requests_timeout, + ), + scenario=scenario, + source_code_format=config.source_code_format, + esbmc_output_type=config.esbmc_output_type, + ) + except ESBMCTimedOutException: + print("error: ESBMC has timed out...") + sys.exit(1) print() @@ -109,32 +119,38 @@ def print_raw_conversation() -> None: ) self.anim.stop() - # TODO Move this process into Solution Generator since have (beginning) is done - # inside, and the other half is done here. - try: - esbmc_output = get_esbmc_output_formatted( - esbmc_output_type=config.esbmc_output_type, - esbmc_output=esbmc_output, - ) - except ValueError: - # Probably did not compile and so ESBMC source code is clang output. - pass - # Print verbose lvl 2 print_horizontal_line(2) printvv(esbmc_output) print_horizontal_line(2) + # Solution found if exit_code == 0: self.on_solution_signal.emit(llm_solution) if config.raw_conversation: print_raw_conversation() + printv("ESBMC-AI Notice: Successfully verified code") + return False, llm_solution + # TODO Move this process into Solution Generator since have (beginning) is done + # inside, and the other half is done here. + # Get formatted ESBMC output + try: + esbmc_output = get_esbmc_output_formatted( + esbmc_output_type=config.esbmc_output_type, + esbmc_output=esbmc_output, + ) + except SourceCodeParseError: + pass + except ESBMCTimedOutException: + print("error: ESBMC has timed out...") + sys.exit(1) + # Failure case - print(f"Failure {idx+1}/{max_retries}: Retrying...") + print(f"ESBMC-AI Notice: Failure {idx+1}/{max_retries}: Retrying...") # If final iteration no need to sleep. if idx < max_retries - 1: @@ -160,4 +176,5 @@ def print_raw_conversation() -> None: if config.raw_conversation: print_raw_conversation() - return True, "Failed all attempts..." + + return True, "ESBMC-AI Notice: Failed all attempts..." diff --git a/esbmc_ai/esbmc_util.py b/esbmc_ai/esbmc_util.py index 3083bd3..d54d769 100644 --- a/esbmc_ai/esbmc_util.py +++ b/esbmc_ai/esbmc_util.py @@ -67,6 +67,25 @@ def get_source_code_err_line_idx(esbmc_output: str) -> Optional[int]: return None +def get_clang_err_line(clang_output: str) -> Optional[int]: + """For when the code does not compile, gets the error line reported in the clang + output. This is useful for `esbmc_output_type single`""" + # TODO Test me + lines: list[str] = clang_output.splitlines() + for line in lines: + # Find the first line containing a filename along with error. + line_split: list[str] = line.split(":") + # Check for the filename + if line_split[0].endswith(".c") and " error" in line_split[3]: + return int(line_split[1]) + + return None + + +def get_clang_err_line_index(clang_output: str) -> Optional[int]: + return get_clang_err_line(clang_output) + + def esbmc(path: str, esbmc_params: list, timeout: Optional[float] = None): """Exit code will be 0 if verification successful, 1 if verification failed. And any other number for compilation error/general errors.""" @@ -84,7 +103,7 @@ def esbmc(path: str, esbmc_params: list, timeout: Optional[float] = None): esbmc_cmd.extend(["--timeout", str(timeout)]) # Add slack time to process to allow verifier to timeout and end gracefully. - process_timeout: Optional[float] = timeout + 1 if timeout else None + process_timeout: Optional[float] = timeout + 10 if timeout else None # Run ESBMC and get output process: CompletedProcess = run( diff --git a/esbmc_ai/logging.py b/esbmc_ai/logging.py index 0408498..3ca1a3a 100644 --- a/esbmc_ai/logging.py +++ b/esbmc_ai/logging.py @@ -36,8 +36,8 @@ def printvvv(m) -> None: def print_horizontal_line(verbosity: int) -> None: - if verbosity >= _verbose: + if _verbose >= verbosity: try: - printvv("-" * get_terminal_size().columns) + print("-" * get_terminal_size().columns) except OSError: pass diff --git a/esbmc_ai/solution_generator.py b/esbmc_ai/solution_generator.py index 639eb70..fd7e664 100644 --- a/esbmc_ai/solution_generator.py +++ b/esbmc_ai/solution_generator.py @@ -16,19 +16,36 @@ esbmc_get_counter_example, esbmc_get_violated_property, get_source_code_err_line_idx, + get_clang_err_line_index, ) +class ESBMCTimedOutException(Exception): + pass + + +class SourceCodeParseError(Exception): + pass + + def get_source_code_formatted( source_code_format: str, source_code: str, esbmc_output: str ) -> str: match source_code_format: case "single": + # Get source code error line from esbmc output line: Optional[int] = get_source_code_err_line_idx(esbmc_output) - assert line, "error line not found in esbmc output" - # ESBMC reports errors starting from 1. To get the correct line, we need to use 0 based - # indexing. - return source_code.splitlines(True)[line] + if line: + return source_code.splitlines(True)[line] + + # Check if it parses + line = get_clang_err_line_index(esbmc_output) + if line: + return source_code.splitlines(True)[line] + + raise AssertionError( + f"error line not found in esbmc output:\n{esbmc_output}" + ) case "full": return source_code case _: @@ -38,11 +55,18 @@ def get_source_code_formatted( def get_esbmc_output_formatted(esbmc_output_type: str, esbmc_output: str) -> str: + # Check for parsing error + if "ERROR: PARSING ERROR" in esbmc_output: + # Parsing errors are usually small in nature. + raise SourceCodeParseError() + elif "ERROR: Timed out" in esbmc_output: + raise ESBMCTimedOutException() + match esbmc_output_type: case "vp": value: Optional[str] = esbmc_get_violated_property(esbmc_output) if not value: - raise ValueError("Not found violated property.") + raise ValueError("Not found violated property." + esbmc_output) return value case "ce": value: Optional[str] = esbmc_get_counter_example(esbmc_output) @@ -77,32 +101,31 @@ def __init__( ai_model=ai_model, llm=llm, ) + self.initial_prompt = ai_model_agent.initial_prompt self.esbmc_output_type: str = esbmc_output_type - self.esbmc_output = get_esbmc_output_formatted( - esbmc_output_type=self.esbmc_output_type, - esbmc_output=esbmc_output, - ) - self.source_code_format: str = source_code_format self.source_code_raw: str = source_code + # Used for resetting state. + self._original_source_code: str = source_code - source_code_formatted: str = get_source_code_formatted( - source_code_format=self.source_code_format, - source_code=self.source_code_raw, - esbmc_output=self.esbmc_output, - ) - - self.apply_template_value( - source_code=source_code_formatted, - esbmc_output=self.esbmc_output, - ) + # Format ESBMC output + try: + self.esbmc_output = get_esbmc_output_formatted( + esbmc_output_type=self.esbmc_output_type, + esbmc_output=esbmc_output, + ) + except SourceCodeParseError: + # When clang output is displayed, show it entirely as it doesn't get very + # big. + self.esbmc_output = esbmc_output @override def compress_message_stack(self) -> None: # Resets the conversation - cannot summarize code self.messages: list[BaseMessage] = [] + self.source_code_raw = self._original_source_code @classmethod def get_code_from_solution(cls, solution: str) -> str: @@ -130,7 +153,23 @@ def get_code_from_solution(cls, solution: str) -> str: return solution def generate_solution(self) -> tuple[str, FinishReason]: - response: ChatResponse = self.send_message(self.initial_prompt) + self.push_to_message_stack(HumanMessage(content=self.initial_prompt)) + + # Format source code + source_code_formatted: str = get_source_code_formatted( + source_code_format=self.source_code_format, + source_code=self.source_code_raw, + esbmc_output=self.esbmc_output, + ) + + # Apply template substitution to message stack + self.apply_template_value( + source_code=source_code_formatted, + esbmc_output=self.esbmc_output, + ) + + # Generate the solution + response: ChatResponse = self.send_message() solution: str = str(response.message.content) solution = SolutionGenerator.get_code_from_solution(solution) @@ -149,4 +188,6 @@ def generate_solution(self) -> tuple[str, FinishReason]: self.source_code_raw, solution, err_line, err_line ) + # Remember it for next cycle + self.source_code_raw = solution return solution, response.finish_reason diff --git a/pyproject.toml b/pyproject.toml index edd126e..418549d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,22 +65,14 @@ Issues = "https://github.com/Yiannis128/esbmc-ai/issues" [tool.hatch.version] path = "esbmc_ai/__about__.py" -# [tool.hatch.build.targets.esbmc_ai] -# ignore-vcs = true -# include = [ -# "/esbmc_ai/**/*.py", -# "/esbmc_ai_config/**/*.py", -# ] -# exclude = [ -# "/.github/*", -# "/.vscode", -# "/notebooks", -# "/samples", -# "/scripts", -# "/tests", -# "/env.example", -# "/.gitignore" -# ] +[tool.hatch.build.targets.sdist] +ignore-vcs = false +# include = ["esbmc-ai/**/*", "pyproject.toml", "README.md"] +# exclude = ["**/*"] +packages = ["esbmc_ai"] -# [tool.hatch.build.targets.esbmc_ai.force-include] -# "config.json" = "esbmc_ai_config/data/config.template.json" +[tool.hatch.build.targets.wheel] +ignore-vcs = false +# include = ["esbmc-ai/**/*", "pyproject.toml", "README.md"] +# exclude = ["**/*"] +packages = ["esbmc_ai"] diff --git a/samples/threading_broken.c b/samples/threading_broken.c new file mode 100644 index 0000000..5f17856 --- /dev/null +++ b/samples/threading_broken.c @@ -0,0 +1,25 @@ +#include +#include + +int a, b; +void __VERIFIER_atomic_acquire(void) +{{ + __VERIFIER_assume(a == 0); + a = 1; +} +void *c(void *arg) +{ + ; + __VERIFIER_atomic_acquire(); + b = 1; + return NULL; +} +pthread_t d; +int main() +{ + pthread_create(&d, 0, c, 0); + __VERIFIER_atomic_acquire(); + if (!b) + assert(0); + return 0; +} \ No newline at end of file