From 8b57acf4b8c539b0cdf7b0b0ebd4593b51611fa9 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Thu, 5 Dec 2024 23:31:11 -0800 Subject: [PATCH] add FlareCleanupTask --- celery_config.py | 8 ++ requirements.in | 2 +- requirements.txt | 6 +- tasks/flare_cleanup.py | 63 ++++++++++++ tasks/tests/unit/test_flare_cleanup.py | 127 +++++++++++++++++++++++++ 5 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 tasks/flare_cleanup.py create mode 100644 tasks/tests/unit/test_flare_cleanup.py diff --git a/celery_config.py b/celery_config.py index b39c03075..d7ed8b3e0 100644 --- a/celery_config.py +++ b/celery_config.py @@ -10,6 +10,7 @@ from shared.celery_config import ( BaseCeleryConfig, brolly_stats_rollup_task_name, + flare_cleanup_task_name, gh_app_webhook_check_task_name, health_check_task_name, profiling_finding_task_name, @@ -94,6 +95,13 @@ def _beat_schedule(): "cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format) }, }, + "flare_cleanup": { + "task": flare_cleanup_task_name, + "schedule": crontab(minute="0", hour="4"), # every day, 4am UTC (8pm PT) + "kwargs": { + "cron_task_generation_time_iso": BeatLazyFunc(get_utc_now_as_iso_format) + }, + }, } if get_config("setup", "find_uncollected_profilings", "enabled", default=True): diff --git a/requirements.in b/requirements.in index cc461432c..970cf4a80 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,5 @@ https://github.com/codecov/test-results-parser/archive/c840502d1b4dd7d05b2efc2c1328affaf2acd27c.tar.gz#egg=test-results-parser -https://github.com/codecov/shared/archive/45252f75524c38172b991a960bac964ec7b6f7d1.tar.gz#egg=shared +https://github.com/codecov/shared/archive/9f5c4fdd00fdc9b47e2aa76367dfce338725f2be.tar.gz#egg=shared https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2f887936a98a668dda42.tar.gz#egg=timestring asgiref>=3.7.2 analytics-python==1.3.0b1 diff --git a/requirements.txt b/requirements.txt index d03409e62..7eb644ad5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -328,7 +328,7 @@ sentry-sdk==2.13.0 # shared setuptools==75.6.0 # via nodeenv -shared @ https://github.com/codecov/shared/archive/45252f75524c38172b991a960bac964ec7b6f7d1.tar.gz#egg=shared +shared @ https://github.com/codecov/shared/archive/9f5c4fdd00fdc9b47e2aa76367dfce338725f2be.tar.gz#egg=shared # via -r requirements.in six==1.16.0 # via @@ -399,4 +399,6 @@ wrapt==1.16.0 yarl==1.9.4 # via vcrpy zstandard==0.23.0 - # via -r requirements.in + # via + # -r requirements.in + # shared diff --git a/tasks/flare_cleanup.py b/tasks/flare_cleanup.py new file mode 100644 index 000000000..2150c6f3c --- /dev/null +++ b/tasks/flare_cleanup.py @@ -0,0 +1,63 @@ +import logging + +from shared.api_archive.archive import ArchiveService +from shared.celery_config import flare_cleanup_task_name +from shared.django_apps.core.models import Pull, PullStates + +from app import celery_app +from tasks.crontasks import CodecovCronTask + +log = logging.getLogger(__name__) + + +class FlareCleanupTask(CodecovCronTask, name=flare_cleanup_task_name): + """ + Flare is a field on a Pull object. + Flare is used to draw static graphs (see GraphHandler view in api) and can be large. + The majority of flare graphs are used in pr comments, so we keep the (maybe large) flare "available" + in either the db or Archive storage while the pull is OPEN. + If the pull is not OPEN, we dump the flare to save space. + If we need to generate a flare graph for a non-OPEN pull, we build_report_from_commit + and generate fresh flare from that report (see GraphHandler view in api). + """ + + @classmethod + def get_min_seconds_interval_between_executions(cls): + return 72000 # 20h + + def run_cron_task(self, db_session, *args, **kwargs): + # for any Pull that is not OPEN, clear the flare field(s) + non_open_pulls = Pull.objects.exclude(state=PullStates.OPEN.value) + + log.info("Starting FlareCleanupTask") + + # clear in db + non_open_pulls_with_flare_in_db = non_open_pulls.filter( + _flare__isnull=False + ).exclude(_flare={}) + # single query, objs are not loaded into memory, does not call .save(), does not refresh updatestamp + n_updated = non_open_pulls_with_flare_in_db.update(_flare=None) + log.info(f"FlareCleanupTask cleared {n_updated} _flares") + + # clear in Archive + non_open_pulls_with_flare_in_archive = non_open_pulls.filter( + _flare_storage_path__isnull=False + ).select_related("repository") + log.info( + f"FlareCleanupTask will clear {non_open_pulls_with_flare_in_archive.count()} Archive flares" + ) + # single query, loads all pulls and repos in qset into memory, deletes file in Archive 1 by 1 + for pull in non_open_pulls_with_flare_in_archive: + archive_service = ArchiveService(repository=pull.repository) + archive_service.delete_file(pull._flare_storage_path) + + # single query, objs are not loaded into memory, does not call .save(), does not refresh updatestamp + n_updated = non_open_pulls_with_flare_in_archive.update( + _flare_storage_path=None + ) + + log.info(f"FlareCleanupTask cleared {n_updated} Archive flares") + + +RegisteredFlareCleanupTask = celery_app.register_task(FlareCleanupTask()) +flare_cleanup_task = celery_app.tasks[RegisteredFlareCleanupTask.name] diff --git a/tasks/tests/unit/test_flare_cleanup.py b/tasks/tests/unit/test_flare_cleanup.py new file mode 100644 index 000000000..21749bb1f --- /dev/null +++ b/tasks/tests/unit/test_flare_cleanup.py @@ -0,0 +1,127 @@ +import json +from unittest.mock import call + +from shared.django_apps.core.models import Pull, PullStates +from shared.django_apps.core.tests.factories import PullFactory, RepositoryFactory + +from tasks.flare_cleanup import FlareCleanupTask + + +class TestFlareCleanupTask(object): + def test_get_min_seconds_interval_between_executions(self): + assert isinstance( + FlareCleanupTask.get_min_seconds_interval_between_executions(), + int, + ) + assert FlareCleanupTask.get_min_seconds_interval_between_executions() > 17000 + + def test_successful_run(self, transactional_db, mocker): + mock_logs = mocker.patch("logging.Logger.info") + mock_archive_service = mocker.patch( + "shared.django_apps.utils.model_utils.ArchiveService" + ) + archive_value_for_flare = {"some": "data"} + mock_archive_service.return_value.read_file.return_value = json.dumps( + archive_value_for_flare + ) + mock_path = "path/to/written/object" + mock_archive_service.return_value.write_json_data_to_storage.return_value = ( + mock_path + ) + mock_archive_service_in_task = mocker.patch( + "tasks.flare_cleanup.ArchiveService" + ) + mock_archive_service_in_task.return_value.delete_file.return_value = None + + local_value_for_flare = {"test": "test"} + open_pull_with_local_flare = PullFactory( + state=PullStates.OPEN.value, + _flare=local_value_for_flare, + repository=RepositoryFactory(), + ) + assert open_pull_with_local_flare.flare == local_value_for_flare + assert open_pull_with_local_flare._flare == local_value_for_flare + assert open_pull_with_local_flare._flare_storage_path is None + + closed_pull_with_local_flare = PullFactory( + state=PullStates.CLOSED.value, + _flare=local_value_for_flare, + repository=RepositoryFactory(), + ) + assert closed_pull_with_local_flare.flare == local_value_for_flare + assert closed_pull_with_local_flare._flare == local_value_for_flare + assert closed_pull_with_local_flare._flare_storage_path is None + + open_pull_with_archive_flare = PullFactory( + state=PullStates.OPEN.value, + _flare=None, + _flare_storage_path=mock_path, + repository=RepositoryFactory(), + ) + assert open_pull_with_archive_flare.flare == archive_value_for_flare + assert open_pull_with_archive_flare._flare is None + assert open_pull_with_archive_flare._flare_storage_path == mock_path + + merged_pull_with_archive_flare = PullFactory( + state=PullStates.MERGED.value, + _flare=None, + _flare_storage_path=mock_path, + repository=RepositoryFactory(), + ) + assert merged_pull_with_archive_flare.flare == archive_value_for_flare + assert merged_pull_with_archive_flare._flare is None + assert merged_pull_with_archive_flare._flare_storage_path == mock_path + + task = FlareCleanupTask() + task.run_cron_task(transactional_db) + + mock_logs.assert_has_calls( + [ + call("Starting FlareCleanupTask"), + call("FlareCleanupTask cleared 1 _flares"), + call("FlareCleanupTask will clear 1 Archive flares"), + call("FlareCleanupTask cleared 1 Archive flares"), + ] + ) + + # there is a cache for flare on the object (all ArchiveFields have this), + # so get a fresh copy of each object without the cached value + open_pull_with_local_flare = Pull.objects.get(id=open_pull_with_local_flare.id) + assert open_pull_with_local_flare.flare == local_value_for_flare + assert open_pull_with_local_flare._flare == local_value_for_flare + assert open_pull_with_local_flare._flare_storage_path is None + + closed_pull_with_local_flare = Pull.objects.get( + id=closed_pull_with_local_flare.id + ) + assert closed_pull_with_local_flare.flare == {} + assert closed_pull_with_local_flare._flare is None + assert closed_pull_with_local_flare._flare_storage_path is None + + open_pull_with_archive_flare = Pull.objects.get( + id=open_pull_with_archive_flare.id + ) + assert open_pull_with_archive_flare.flare == archive_value_for_flare + assert open_pull_with_archive_flare._flare is None + assert open_pull_with_archive_flare._flare_storage_path == mock_path + + merged_pull_with_archive_flare = Pull.objects.get( + id=merged_pull_with_archive_flare.id + ) + assert merged_pull_with_archive_flare.flare == {} + assert merged_pull_with_archive_flare._flare is None + assert merged_pull_with_archive_flare._flare_storage_path is None + + mock_logs.reset_mock() + # check that once these pulls are corrected they are not corrected again + task = FlareCleanupTask() + task.run_cron_task(transactional_db) + + mock_logs.assert_has_calls( + [ + call("Starting FlareCleanupTask"), + call("FlareCleanupTask cleared 0 _flares"), + call("FlareCleanupTask will clear 0 Archive flares"), + call("FlareCleanupTask cleared 0 Archive flares"), + ] + )