From fcee822a1454c27b1d1c45f117a14bff4f5df5e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Ri=C3=9Fe?= Date: Thu, 16 Feb 2023 14:27:55 +0100 Subject: [PATCH] Add better handling of symlinks --- src/reuse/project.py | 84 ++++++++++++++++++++++++++++---------------- src/reuse/report.py | 7 ++-- src/reuse/vcs.py | 4 +-- 3 files changed, 60 insertions(+), 35 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 9f192f9e7..d0089639f 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2023 DB Systel GmbH +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -136,8 +137,27 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]: _LOGGER.debug("ignoring '%s'", the_file) continue if the_file.is_symlink(): - _LOGGER.debug("skipping symlink '%s'", the_file) - continue + # Needs to use os.path.absolute instead of Path.absolute + # since the former normalizes the path, i.e. resolves "..". + # There is no method in pathlib for this which doesn't also + # resolve symlinks recursively, like Path.resolve. + target_file = Path( + os.path.abspath(the_file.readlink()) # type: ignore + ) + _LOGGER.debug( + "'%s' is a symlink pointing to '%s'", + the_file, + target_file, + ) + if ( + target_file.is_relative_to( # type: ignore # pylint: disable=E1101 + self.root.resolve() + ) + and (target_file.exists() or target_file.is_symlink()) + and not self._is_path_ignored(target_file) + ): + _LOGGER.debug("skipping symlink '%s'", the_file) + continue # Suppressing this error because I simply don't want to deal # with that here. with contextlib.suppress(OSError): @@ -184,35 +204,39 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo: dep5_path = source_path # Search the file for REUSE information. - with path.open("rb") as fp: - try: - # Completely read the file once to search for possible snippets - if _contains_snippet(fp): - _LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet") - read_limit = None - else: - read_limit = _HEADER_BYTES - # Reset read position - fp.seek(0) - # Scan the file for REUSE info, possible limiting the read - # length - file_result = extract_reuse_info( - decoded_text_from_binary(fp, size=read_limit) - ) - if file_result: - source_path = str(path) - if path.suffix == ".license": - source_type = SourceType.DOT_LICENSE_FILE + if not path.is_symlink(): + with path.open("rb") as fp: + try: + # Completely read the file once to search for possible + # snippets + if _contains_snippet(fp): + _LOGGER.debug( + f"'{path}' seems to contain a SPDX Snippet" + ) + read_limit = None else: - source_type = SourceType.FILE_HEADER - - except (ExpressionError, ParseError): - _LOGGER.error( - _( - "'{path}' holds an SPDX expression that cannot be" - " parsed, skipping the file" - ).format(path=path) - ) + read_limit = _HEADER_BYTES + # Reset read position + fp.seek(0) + # Scan the file for REUSE info, possible limiting the read + # length + file_result = extract_reuse_info( + decoded_text_from_binary(fp, size=read_limit) + ) + if file_result: + source_path = str(path) + if path.suffix == ".license": + source_type = SourceType.DOT_LICENSE_FILE + else: + source_type = SourceType.FILE_HEADER + + except (ExpressionError, ParseError): + _LOGGER.error( + _( + "'{path}' holds an SPDX expression that cannot be" + " parsed, skipping the file" + ).format(path=path) + ) # There is both information in a .dep5 file and in the file header if ( diff --git a/src/reuse/report.py b/src/reuse/report.py index 268e48430..297a8e0a8 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Pietro Albini +# SPDX-FileCopyrightText: 2023 Matthias Riße # # SPDX-License-Identifier: GPL-3.0-or-later @@ -445,14 +446,14 @@ def generate( ) -> "FileReport": """Generate a FileReport from a path in a Project.""" path = Path(path) - if not path.is_file(): - raise OSError(f"{path} is not a file") + if not path.is_file() and not path.is_symlink(): + raise OSError(f"{path} is not supported") relative = project.relative_from_root(path) report = cls("./" + str(relative), path, do_checksum=do_checksum) # Checksum and ID - if report.do_checksum: + if report.do_checksum and not path.is_symlink(): report.spdxfile.chk_sum = _checksum(path) else: # This path avoids a lot of heavy computation, which is handy for diff --git a/src/reuse/vcs.py b/src/reuse/vcs.py index ec760abea..a22e9de6d 100644 --- a/src/reuse/vcs.py +++ b/src/reuse/vcs.py @@ -100,7 +100,7 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files[:-1]} + return {Path(file_) for file_ in all_files[:-1]}.union({Path(".git")}) def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path) @@ -168,7 +168,7 @@ def _find_all_ignored_files(self) -> Set[Path]: ] result = execute_command(command, _LOGGER, cwd=self.project.root) all_files = result.stdout.decode("utf-8").split("\0") - return {Path(file_) for file_ in all_files[:-1]} + return {Path(file_) for file_ in all_files[:-1]}.union({Path(".hg")}) def is_ignored(self, path: StrPath) -> bool: path = self.project.relative_from_root(path)