From 57201600c94dfc5fff43acb3850d06874b3d5683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Randy=20D=C3=B6ring?= <30527984+radoering@users.noreply.github.com> Date: Sat, 7 Oct 2023 09:18:22 +0200 Subject: [PATCH] refactor: let ArtifactCache handle downloading artifacts that are not yet cached --- src/poetry/installation/executor.py | 40 +++++++++------------------- src/poetry/packages/direct_origin.py | 11 +++----- src/poetry/utils/cache.py | 37 +++++++++++++++++++++++-- tests/installation/test_executor.py | 32 ++++++++++++---------- tests/packages/test_direct_origin.py | 4 +-- 5 files changed, 71 insertions(+), 53 deletions(-) diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py index 8af87a4716f..0e8cc3dcf98 100644 --- a/src/poetry/installation/executor.py +++ b/src/poetry/installation/executor.py @@ -2,6 +2,7 @@ import contextlib import csv +import functools import itertools import json import os @@ -28,7 +29,6 @@ from poetry.puzzle.exceptions import SolverProblemError from poetry.utils._compat import decode from poetry.utils.authenticator import Authenticator -from poetry.utils.cache import ArtifactCache from poetry.utils.env import EnvCommandError from poetry.utils.helpers import Downloader from poetry.utils.helpers import get_file_hash @@ -79,7 +79,7 @@ def __init__( else: self._max_workers = 1 - self._artifact_cache = ArtifactCache(cache_dir=config.artifacts_cache_directory) + self._artifact_cache = pool.artifact_cache self._authenticator = Authenticator( config, self._io, disable_cache=disable_cache, pool_size=self._max_workers ) @@ -766,23 +766,11 @@ def _download(self, operation: Install | Update) -> Path: def _download_link(self, operation: Install | Update, link: Link) -> Path: package = operation.package - output_dir = self._artifact_cache.get_cache_directory_for_link(link) - # Try to get cached original package for the link provided + # Get original package for the link provided + download_func = functools.partial(self._download_archive, operation) original_archive = self._artifact_cache.get_cached_archive_for_link( - link, strict=True + link, strict=True, download_func=download_func ) - if original_archive is None: - # No cached original distributions was found, so we download and prepare it - try: - original_archive = self._download_archive(operation, link) - except BaseException: - cache_directory = self._artifact_cache.get_cache_directory_for_link( - link - ) - cached_file = cache_directory.joinpath(link.filename) - cached_file.unlink(missing_ok=True) - - raise # Get potential higher prioritized cached archive, otherwise it will fall back # to the original archive. @@ -808,7 +796,7 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path: ) self._write(operation, message) - archive = self._chef.prepare(archive, output_dir=output_dir) + archive = self._chef.prepare(archive, output_dir=original_archive.parent) # Use the original archive to provide the correct hash. self._populate_hashes_dict(original_archive, package) @@ -833,13 +821,13 @@ def _validate_archive_hash(archive: Path, package: Package) -> str: return archive_hash - def _download_archive(self, operation: Install | Update, link: Link) -> Path: - archive = ( - self._artifact_cache.get_cache_directory_for_link(link) / link.filename - ) - archive.parent.mkdir(parents=True, exist_ok=True) - - downloader = Downloader(link.url, archive, self._authenticator) + def _download_archive( + self, + operation: Install | Update, + url: str, + dest: Path, + ) -> None: + downloader = Downloader(url, dest, self._authenticator) wheel_size = downloader.total_size operation_message = self.get_operation_message(operation) @@ -872,8 +860,6 @@ def _download_archive(self, operation: Install | Update, link: Link) -> Path: with self._lock: progress.finish() - return archive - def _should_write_operation(self, operation: Operation) -> bool: return ( not operation.skipped or self._dry_run or self._verbose or not self._enabled diff --git a/src/poetry/packages/direct_origin.py b/src/poetry/packages/direct_origin.py index f8e6095b051..9451e381205 100644 --- a/src/poetry/packages/direct_origin.py +++ b/src/poetry/packages/direct_origin.py @@ -76,14 +76,9 @@ def get_package_from_directory(cls, directory: Path) -> Package: def get_package_from_url(self, url: str) -> Package: link = Link(url) - artifact = self._artifact_cache.get_cached_archive_for_link(link, strict=True) - - if not artifact: - artifact = ( - self._artifact_cache.get_cache_directory_for_link(link) / link.filename - ) - artifact.parent.mkdir(parents=True, exist_ok=True) - download_file(url, artifact) + artifact = self._artifact_cache.get_cached_archive_for_link( + link, strict=True, download_func=download_file + ) package = self.get_package_from_file(artifact) package.files = [ diff --git a/src/poetry/utils/cache.py b/src/poetry/utils/cache.py index 79e67394f82..5bd6a8dc7ef 100644 --- a/src/poetry/utils/cache.py +++ b/src/poetry/utils/cache.py @@ -12,6 +12,7 @@ from typing import Any from typing import Generic from typing import TypeVar +from typing import overload from poetry.utils._compat import decode from poetry.utils._compat import encode @@ -218,18 +219,49 @@ def get_cache_directory_for_git( return self._get_directory_from_hash(key_parts) + @overload + def get_cached_archive_for_link( + self, + link: Link, + *, + strict: bool, + env: Env | None = ..., + download_func: Callable[[str, Path], None], + ) -> Path: ... + + @overload + def get_cached_archive_for_link( + self, + link: Link, + *, + strict: bool, + env: Env | None = ..., + download_func: None = ..., + ) -> Path | None: ... + def get_cached_archive_for_link( self, link: Link, *, strict: bool, env: Env | None = None, + download_func: Callable[[str, Path], None] | None = None, ) -> Path | None: cache_dir = self.get_cache_directory_for_link(link) - return self._get_cached_archive( + cached_archive = self._get_cached_archive( cache_dir, strict=strict, filename=link.filename, env=env ) + if cached_archive is None and strict and download_func is not None: + cache_dir.mkdir(parents=True, exist_ok=True) + cached_archive = cache_dir / link.filename + try: + download_func(link.url, cached_archive) + except BaseException: + cached_archive.unlink(missing_ok=True) + raise + + return cached_archive def get_cached_archive_for_git( self, url: str, reference: str, subdirectory: str | None, env: Env @@ -246,8 +278,9 @@ def _get_cached_archive( filename: str | None = None, env: Env | None = None, ) -> Path | None: + # implication "not strict -> env must not be None" assert strict or env is not None - # implication "strict -> filename should not be None" + # implication "strict -> filename must not be None" assert not strict or filename is not None archives = self._get_cached_archives(cache_dir) diff --git a/tests/installation/test_executor.py b/tests/installation/test_executor.py index 4e0ad9cb091..afd4272dff9 100644 --- a/tests/installation/test_executor.py +++ b/tests/installation/test_executor.py @@ -21,7 +21,6 @@ from cleo.io.buffered_io import BufferedIO from cleo.io.outputs.output import Verbosity from poetry.core.packages.package import Package -from poetry.core.packages.utils.link import Link from poetry.core.packages.utils.utils import path_to_url from poetry.factory import Factory @@ -593,11 +592,11 @@ def test_executor_should_delete_incomplete_downloads( side_effect=Exception("Download error"), ) mocker.patch( - "poetry.installation.executor.ArtifactCache.get_cached_archive_for_link", + "poetry.utils.cache.ArtifactCache._get_cached_archive", return_value=None, ) mocker.patch( - "poetry.installation.executor.ArtifactCache.get_cache_directory_for_link", + "poetry.utils.cache.ArtifactCache.get_cache_directory_for_link", return_value=tmp_path, ) @@ -823,7 +822,7 @@ def test_executor_should_write_pep610_url_references_for_wheel_urls( if is_artifact_cached: link_cached = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" mocker.patch( - "poetry.installation.executor.ArtifactCache.get_cached_archive_for_link", + "poetry.utils.cache.ArtifactCache.get_cached_archive_for_link", return_value=link_cached, ) download_spy = mocker.spy(Executor, "_download_archive") @@ -861,9 +860,13 @@ def test_executor_should_write_pep610_url_references_for_wheel_urls( else: assert package.source_url is not None download_spy.assert_called_once_with( - mocker.ANY, operation, Link(package.source_url) + mocker.ANY, + operation, + package.source_url, + dest=mocker.ANY, ) - assert download_spy.spy_return.exists(), "cached file should not be deleted" + dest = download_spy.call_args.args[3] + assert dest.exists(), "cached file should not be deleted" @pytest.mark.parametrize( @@ -900,12 +903,12 @@ def test_executor_should_write_pep610_url_references_for_non_wheel_urls( ) download_spy = mocker.spy(Executor, "_download_archive") - if is_sdist_cached | is_wheel_cached: + if is_sdist_cached or is_wheel_cached: cached_sdist = fixture_dir("distributions") / "demo-0.1.0.tar.gz" cached_wheel = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" - def mock_get_cached_archive_for_link_func( - _: Link, *, strict: bool, **__: Any + def mock_get_cached_archive_func( + _cache_dir: Path, *, strict: bool, **__: Any ) -> Path | None: if is_wheel_cached and not strict: return cached_wheel @@ -914,8 +917,8 @@ def mock_get_cached_archive_for_link_func( return None mocker.patch( - "poetry.installation.executor.ArtifactCache.get_cached_archive_for_link", - side_effect=mock_get_cached_archive_for_link_func, + "poetry.utils.cache.ArtifactCache._get_cached_archive", + side_effect=mock_get_cached_archive_func, ) package = Package( @@ -955,9 +958,10 @@ def mock_get_cached_archive_for_link_func( if expect_artifact_download: assert package.source_url is not None download_spy.assert_called_once_with( - mocker.ANY, operation, Link(package.source_url) + mocker.ANY, operation, package.source_url, dest=mocker.ANY ) - assert download_spy.spy_return.exists(), "cached file should not be deleted" + dest = download_spy.call_args.args[3] + assert dest.exists(), "cached file should not be deleted" else: download_spy.assert_not_called() @@ -978,7 +982,7 @@ def test_executor_should_write_pep610_url_references_for_git( if is_artifact_cached: link_cached = fixture_dir("distributions") / "demo-0.1.2-py2.py3-none-any.whl" mocker.patch( - "poetry.installation.executor.ArtifactCache.get_cached_archive_for_git", + "poetry.utils.cache.ArtifactCache.get_cached_archive_for_git", return_value=link_cached, ) clone_spy = mocker.spy(Git, "clone") diff --git a/tests/packages/test_direct_origin.py b/tests/packages/test_direct_origin.py index ff9548c5fdf..55a63946b39 100644 --- a/tests/packages/test_direct_origin.py +++ b/tests/packages/test_direct_origin.py @@ -43,7 +43,7 @@ def test_direct_origin_does_not_download_url_dependency_when_cached( ) direct_origin = DirectOrigin(artifact_cache) url = "https://python-poetry.org/distributions/demo-0.1.0-py2.py3-none-any.whl" - mocker.patch( + download_file = mocker.patch( "poetry.packages.direct_origin.download_file", side_effect=Exception("download_file should not be called"), ) @@ -52,5 +52,5 @@ def test_direct_origin_does_not_download_url_dependency_when_cached( assert package.name == "demo" artifact_cache.get_cached_archive_for_link.assert_called_once_with( - Link(url), strict=True + Link(url), strict=True, download_func=download_file )