Skip to content

Commit

Permalink
harden lazy wheel wheel error handling
Browse files Browse the repository at this point in the history
This change attempts to provide a catch-all safety net for circumstances
where an index server is behaving badly when ranged requests are made.

Additionally, this change also helps guard against  missing
"Content-Range" headers in specific scenarios.
  • Loading branch information
abn committed Feb 28, 2024
1 parent f66f8ab commit b7bc5ce
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 45 deletions.
41 changes: 36 additions & 5 deletions src/poetry/inspection/lazy_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,19 @@
logger = logging.getLogger(__name__)


class HTTPRangeRequestUnsupported(Exception):
class LazyWheelUnsupportedError(Exception):
"""Raised when a lazy wheel is unsupported."""


class HTTPRangeRequestUnsupported(LazyWheelUnsupportedError):
"""Raised when the remote server appears unable to support byte ranges."""


class UnsupportedWheel(Exception):
class UnsupportedWheel(LazyWheelUnsupportedError):
"""Unsupported wheel."""


class InvalidWheel(Exception):
class InvalidWheel(LazyWheelUnsupportedError):
"""Invalid (e.g. corrupt) wheel."""

def __init__(self, location: str, name: str) -> None:
Expand Down Expand Up @@ -85,6 +89,24 @@ def metadata_from_wheel_url(
# themselves are invalid, not because we've messed up our bookkeeping
# and produced an invalid file.
raise InvalidWheel(url, name)
except Exception as e:
if isinstance(e, LazyWheelUnsupportedError):
# this is expected when the code handles issues with lazy wheel metadata retrieval correctly
raise e

logger.debug(
"There was an unexpected %s when handling lazy wheel metadata retrieval for %s from %s: %s",
type(e).__name__,
name,
url,
e,
)

# Catch all exception to handle any issues that may have occurred during
# attempts to use Lazy Wheel.
raise LazyWheelUnsupportedError(
f"Attempts to use lazy wheel metadata retrieval for {name} from {url} failed"
) from e


class MergeIntervals:
Expand Down Expand Up @@ -590,6 +612,12 @@ def _try_initial_chunk_request(
f"did not receive partial content: got code {code}"
)

if "Content-Range" not in tail.headers:
raise LazyWheelUnsupportedError(
f"file length cannot be determined for {self._url}, "
f"did not receive content range header from server"
)

file_length = self._parse_full_length_from_content_range(
tail.headers["Content-Range"]
)
Expand All @@ -614,10 +642,13 @@ def _extract_content_length(

# This indicates that the requested range from the end was larger than the
# actual file size: https://www.rfc-editor.org/rfc/rfc9110#status.416.
if code == codes.requested_range_not_satisfiable:
if (
code == codes.requested_range_not_satisfiable
and resp is not None
and "Content-Range" in resp.headers
):
# In this case, we don't have any file content yet, but we do know the
# size the file will be, so we can return that and exit here.
assert resp is not None
file_length = self._parse_full_length_from_content_range(
resp.headers["Content-Range"]
)
Expand Down
4 changes: 2 additions & 2 deletions src/poetry/repositories/http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from poetry.config.config import Config
from poetry.inspection.info import PackageInfo
from poetry.inspection.lazy_wheel import HTTPRangeRequestUnsupported
from poetry.inspection.lazy_wheel import LazyWheelUnsupportedError
from poetry.inspection.lazy_wheel import metadata_from_wheel_url
from poetry.repositories.cached_repository import CachedRepository
from poetry.repositories.exceptions import PackageNotFound
Expand Down Expand Up @@ -122,7 +122,7 @@ def _get_info_from_wheel(self, link: Link) -> PackageInfo:
package_info = PackageInfo.from_metadata(
metadata_from_wheel_url(link.filename, link.url, self.session)
)
except HTTPRangeRequestUnsupported:
except LazyWheelUnsupportedError:
# Do not set to False if we already know that the domain supports
# range requests for some URLs!
raise_accepts_ranges = False
Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from tests.helpers import TestLocker
from tests.helpers import TestRepository
from tests.helpers import get_package
from tests.helpers import http_setup_redirect
from tests.helpers import isolated_environment
from tests.helpers import mock_clone
from tests.helpers import mock_download
Expand Down Expand Up @@ -321,6 +322,11 @@ def http() -> Iterator[type[httpretty.httpretty]]:
yield httpretty


@pytest.fixture
def http_redirector(http: type[httpretty.httpretty]) -> None:
http_setup_redirect(http, http.HEAD, http.GET, http.PUT, http.POST)


@pytest.fixture
def project_root() -> Path:
return Path(__file__).parent.parent
Expand Down Expand Up @@ -513,3 +519,9 @@ def venv_flags_default() -> dict[str, bool]:
"no-pip": False,
"no-setuptools": False,
}


@pytest.fixture(autouse=(os.name == "nt"))
def httpretty_windows_mock_urllib3_wait_for_socket(mocker: MockerFixture) -> None:
# this is a workaround for https://github.com/gabrielfalcao/HTTPretty/issues/442
mocker.patch("urllib3.util.wait.select_wait_for_socket", returns=True)
26 changes: 26 additions & 0 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
from typing import Any
from typing import Mapping

import httpretty

from httpretty.core import HTTPrettyRequest
from poetry.core.constraints.version import Version
from poetry.core.packages.dependency import Dependency
from pytest_mock import MockerFixture
Expand All @@ -38,6 +41,7 @@
from poetry.installation.operations.operation import Operation
from poetry.poetry import Poetry
from poetry.utils.authenticator import Authenticator
from tests.types import HTTPrettyResponse

FIXTURE_PATH = Path(__file__).parent / "fixtures"

Expand Down Expand Up @@ -321,3 +325,25 @@ def recurse_keys(obj: Mapping[str, Any]) -> Iterator[tuple[list[str], Any]]:
yield ([], obj)

return {delimiter.join(path): value for path, value in recurse_keys(obj)}


def http_setup_redirect(
http: type[httpretty.httpretty], *methods: str, status_code: int = 301
) -> None:
redirect_uri_regex = re.compile("^(?P<protocol>https?)://redirect.(?P<uri>.*)$")

def redirect_request_callback(
request: HTTPrettyRequest, uri: str, headers: dict[str, Any]
) -> HTTPrettyResponse:
redirect_uri_match = redirect_uri_regex.match(uri)
assert redirect_uri_match is not None
redirect_uri = f"{redirect_uri_match.group('protocol')}://{redirect_uri_match.group('uri')}"
return status_code, {"Location": redirect_uri}, b""

for method in methods:
http.register_uri(
method,
redirect_uri_regex,
status=status_code,
body=redirect_request_callback,
)
Loading

0 comments on commit b7bc5ce

Please sign in to comment.