Skip to content

Commit

Permalink
Add better handling of symlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
matrss committed Jun 22, 2023
1 parent 2bb117c commit fcee822
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 35 deletions.
84 changes: 54 additions & 30 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -136,8 +137,27 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
_LOGGER.debug("ignoring '%s'", the_file)
continue
if the_file.is_symlink():
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
# Needs to use os.path.absolute instead of Path.absolute
# since the former normalizes the path, i.e. resolves "..".
# There is no method in pathlib for this which doesn't also
# resolve symlinks recursively, like Path.resolve.
target_file = Path(
os.path.abspath(the_file.readlink()) # type: ignore
)
_LOGGER.debug(
"'%s' is a symlink pointing to '%s'",
the_file,
target_file,
)
if (
target_file.is_relative_to( # type: ignore # pylint: disable=E1101
self.root.resolve()
)
and (target_file.exists() or target_file.is_symlink())
and not self._is_path_ignored(target_file)
):
_LOGGER.debug("skipping symlink '%s'", the_file)
continue
# Suppressing this error because I simply don't want to deal
# with that here.
with contextlib.suppress(OSError):
Expand Down Expand Up @@ -184,35 +204,39 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
dep5_path = source_path

# Search the file for REUSE information.
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible snippets
if _contains_snippet(fp):
_LOGGER.debug(f"'{path}' seems to contain a SPDX Snippet")
read_limit = None
else:
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
if not path.is_symlink():
with path.open("rb") as fp:
try:
# Completely read the file once to search for possible
# snippets
if _contains_snippet(fp):
_LOGGER.debug(
f"'{path}' seems to contain a SPDX Snippet"
)
read_limit = None
else:
source_type = SourceType.FILE_HEADER

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)
read_limit = _HEADER_BYTES
# Reset read position
fp.seek(0)
# Scan the file for REUSE info, possible limiting the read
# length
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
else:
source_type = SourceType.FILE_HEADER

except (ExpressionError, ParseError):
_LOGGER.error(
_(
"'{path}' holds an SPDX expression that cannot be"
" parsed, skipping the file"
).format(path=path)
)

# There is both information in a .dep5 file and in the file header
if (
Expand Down
7 changes: 4 additions & 3 deletions src/reuse/report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2022 Pietro Albini <pietro.albini@ferrous-systems.com>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -445,14 +446,14 @@ def generate(
) -> "FileReport":
"""Generate a FileReport from a path in a Project."""
path = Path(path)
if not path.is_file():
raise OSError(f"{path} is not a file")
if not path.is_file() and not path.is_symlink():
raise OSError(f"{path} is not supported")

relative = project.relative_from_root(path)
report = cls("./" + str(relative), path, do_checksum=do_checksum)

# Checksum and ID
if report.do_checksum:
if report.do_checksum and not path.is_symlink():
report.spdxfile.chk_sum = _checksum(path)
else:
# This path avoids a lot of heavy computation, which is handy for
Expand Down
4 changes: 2 additions & 2 deletions src/reuse/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _find_all_ignored_files(self) -> Set[Path]:
]
result = execute_command(command, _LOGGER, cwd=self.project.root)
all_files = result.stdout.decode("utf-8").split("\0")
return {Path(file_) for file_ in all_files[:-1]}
return {Path(file_) for file_ in all_files[:-1]}.union({Path(".git")})

def is_ignored(self, path: StrPath) -> bool:
path = self.project.relative_from_root(path)
Expand Down Expand Up @@ -168,7 +168,7 @@ def _find_all_ignored_files(self) -> Set[Path]:
]
result = execute_command(command, _LOGGER, cwd=self.project.root)
all_files = result.stdout.decode("utf-8").split("\0")
return {Path(file_) for file_ in all_files[:-1]}
return {Path(file_) for file_ in all_files[:-1]}.union({Path(".hg")})

def is_ignored(self, path: StrPath) -> bool:
path = self.project.relative_from_root(path)
Expand Down

0 comments on commit fcee822

Please sign in to comment.