Skip to content

Commit

Permalink
fix: don't try to delete directories in cleanup step (#263)
Browse files Browse the repository at this point in the history
fixes:
> DEBUG:edi_energy_scraper.scraper:Removing /home/runner/work/edi_energy_mirror/edi_energy_mirror/edi_energy_de/FV2504
Traceback (most recent call last):
  File "/home/runner/work/edi_energy_mirror/edi_energy_mirror/download_and_post_process.py", line 16, in <module>
    loop.run_until_complete(mirror())
    ~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^
  File "/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/asyncio/base_events.py", line 720, in run_until_complete
    return future.result()
           ~~~~~~~~~~~~~^^
  File "/home/runner/work/edi_energy_mirror/edi_energy_mirror/download_and_post_process.py", line 11, in mirror
    await scraper.mirror()
  File "/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/edi_energy_scraper/scraper.py", line 112, in mirror
    self._remove_old_files(all_metadata)
    ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
  File "/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/site-packages/edi_energy_scraper/scraper.py", line 61, in _remove_old_files
    downloaded_file.unlink()
    ~~~~~~~~~~~~~~~~~~~~~~^^
  File "/opt/hostedtoolcache/Python/3.13.1/x64/lib/python3.13/pathlib/_local.py", line 746, in unlink
    os.unlink(self)
    ~~~~~~~~~^^^^^^
IsADirectoryError: [Errno 21] Is a directory: 'edi_energy_de/FV2504'

Co-authored-by: Konstantin <konstantin.klein+github@hochfrequenz.de>
  • Loading branch information
hf-kklein and Konstantin authored Jan 11, 2025
1 parent f1321a2 commit a4972ee
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/edi_energy_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async def get_documents_overview(self) -> list[Document]:

def _remove_old_files(self, documents: list[Document]) -> None:
"""removes those files that are no longer available online"""
all_downloaded_files = self._root_dir.rglob("**/*")
all_downloaded_files = (f for f in self._root_dir.rglob("**/*") if f.is_file())
all_recent_file_ids = {str(d.fileId) for d in documents}
for downloaded_file in all_downloaded_files:
file_id_of_downloaded_file = downloaded_file.stem.split("_")[-1]
Expand Down
2 changes: 2 additions & 0 deletions unittests/test_downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ async def test_download_file(tmp_path: Path) -> None:
def test_cleanup(tmp_path: Path) -> None:
test_folder = tmp_path / "test"
test_folder.mkdir()
a_directory = test_folder / "adir"
a_directory.mkdir()
outdated_file_path = test_folder / "foo_123.pdf"
outdated_file_path.touch()
recent_file_path = test_folder / "foo_456.docx"
Expand Down

0 comments on commit a4972ee

Please sign in to comment.