From 67678c2e1c51e5dc59f96b886e01ba3d29d9499d Mon Sep 17 00:00:00 2001 From: peace-maker Date: Mon, 12 Aug 2024 20:59:22 +0200 Subject: [PATCH] Cache output of `asm()` (#2358) * Cache output of `asm()` To speed up repeated runs of an exploit, cache the assembled output. Use a sha1 hash of the shellcode as well as relevant context values like `context.arch` and `context.bits` to see if the exact same shellcode was assembled for the same context before. Fixes #2312 * Return path to cache file if `not extract` * Update CHANGELOG * Create temporary copy of cached file * Add debug log about using the cache * Include full assembler and linker commandlines in hash This should catch any changes across pwntools updates and system environment changes. * Include pwntools version in hash --- CHANGELOG.md | 2 ++ pwnlib/asm.py | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 758686019..554c28597 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,7 +72,9 @@ The table below shows which release corresponds to each branch, and what date th ## 4.15.0 (`dev`) +- [#2358][2358] Cache output of `asm()` +[2358]: https://github.com/Gallopsled/pwntools/pull/2358 ## 4.14.0 (`beta`) diff --git a/pwnlib/asm.py b/pwnlib/asm.py index 69264f66f..b95288acd 100644 --- a/pwnlib/asm.py +++ b/pwnlib/asm.py @@ -59,6 +59,9 @@ from pwnlib.context import LocalContext from pwnlib.context import context from pwnlib.log import getLogger +from pwnlib.util.hashes import sha1sumhex +from pwnlib.util.packing import _encode +from pwnlib.version import __version__ log = getLogger(__name__) @@ -758,8 +761,21 @@ def asm(shellcode, vma = 0, extract = True, shared = False): b'0@*\x00' >>> asm("la %r0, 42", arch = 's390', bits=64) b'A\x00\x00*' + + The output is cached: + + >>> start = time.time() + >>> asm("lea rax, [rip+0]", arch = 'amd64') + b'H\x8d\x05\x00\x00\x00\x00' + >>> uncached_time = time.time() - start + >>> start = time.time() + >>> asm("lea rax, [rip+0]", arch = 'amd64') + b'H\x8d\x05\x00\x00\x00\x00' + >>> cached_time = time.time() - start + >>> uncached_time > cached_time + True """ - result = '' + result = b'' assembler = _assembler() linker = _linker() @@ -770,6 +786,30 @@ def asm(shellcode, vma = 0, extract = True, shared = False): log.debug('Assembling\n%s' % code) + cache_file = None + if context.cache_dir: + cache_dir = os.path.join(context.cache_dir, 'asm-cache') + if not os.path.isdir(cache_dir): + os.makedirs(cache_dir) + + # Include the context in the hash in addition to the shellcode + hash_params = '{}_{}_{}_{}'.format(vma, extract, shared, __version__) + fingerprint_params = _encode(code) + _encode(hash_params) + _encode(' '.join(assembler)) + _encode(' '.join(linker)) + _encode(' '.join(objcopy)) + asm_hash = sha1sumhex(fingerprint_params) + cache_file = os.path.join(cache_dir, asm_hash) + if os.path.exists(cache_file): + log.debug('Using cached assembly output from %r', cache_file) + if extract: + with open(cache_file, 'rb') as f: + return f.read() + + # Create a temporary copy of the cached file to avoid modification. + tmpdir = tempfile.mkdtemp(prefix = 'pwn-asm-') + atexit.register(shutil.rmtree, tmpdir) + step3 = os.path.join(tmpdir, 'step3') + shutil.copy(cache_file, step3) + return step3 + tmpdir = tempfile.mkdtemp(prefix = 'pwn-asm-') step1 = path.join(tmpdir, 'step1') step2 = path.join(tmpdir, 'step2') @@ -817,6 +857,8 @@ def asm(shellcode, vma = 0, extract = True, shared = False): shutil.copy(step2, step3) if not extract: + if cache_file is not None: + shutil.copy(step3, cache_file) return step3 _run(objcopy + [step3, step4]) @@ -830,6 +872,10 @@ def asm(shellcode, vma = 0, extract = True, shared = False): else: atexit.register(lambda: shutil.rmtree(tmpdir)) + if cache_file is not None and result != b'': + with open(cache_file, 'wb') as f: + f.write(result) + return result @LocalContext