diff --git a/lib/Conch/Command/thin_device_reports.pm b/lib/Conch/Command/thin_device_reports.pm index d00104381..a8d82910f 100644 --- a/lib/Conch/Command/thin_device_reports.pm +++ b/lib/Conch/Command/thin_device_reports.pm @@ -104,101 +104,59 @@ sub _process_device ($self, $device) { my $report_count = 0; print 'device id ', $device->id, ': '; - # Consider the validation status of all reports, oldest first, in pages of 100 rows each. - # Valid reports with no validation results are considered to be a 'pass', i.e. eligible for - # deletion. + my $query = <<'SQL'; +select id from ( + select + device_report.id, + device_report.created, + coalesce( + case when count(distinct(validation_state.status)) > 1 then 'NOT_UNIQUE' + else min(validation_state.status)::text end, + 'NONE') as status, + row_number() over ( + partition by status order by device_report.created desc + ) as seq + from device_report + left join validation_state on validation_state.device_report_id = device_report.id + where device_report.device_id = ? + group by device_report.id, device_report.created, status +) _tmp +where status != 'NOT_UNIQUE' and seq > 1 and created < (now() - interval '6 months') +SQL + + # delete all device_report rows (cascading to validation_state, validation_state_member) which + # are older than six months, except for the most recent report of each validation status (error, + # fail, pass) my $device_report_rs = $self->app->db_device_reports - ->search({ 'device_report.device_id' => $device->id }) - ->columns('device_report.id') - ->with_report_status - ->order_by({ -asc => 'device_report.created' }) - ->rows(100) - ->page(1) - ->hri; - - # we only delete reports when we are done, so we can safely iterate through reports - # without the pages changing strangely - my @delete_report_ids; - - # we push data about reports to the end as we consider each one, - # and shift data off at the beginning when we're done - # $report_statuses[-1] current report - # $report_statuses[-2] previous report - # $report_statuses[-3] 2 reports ago - my @report_statuses; - - foreach my $page (1 .. $device_report_rs->pager->last_page) { - $device_report_rs = $device_report_rs->page($page); - while (my $device_report = $device_report_rs->next) { - ++$report_count; - print '.' if $report_count % 100 == 0; - - # capture information about the latest report we just fetched. - push @report_statuses, $device_report; - - # we maintain a sliding window of (at least?) 3 reports. - # We can consider what to do about the middle report now. - - # prevprev previous current delete previous? - # dne dne FAIL 0 previous report does not exist - # dne dne PASS 0 previous report does not exist - # dne FAIL FAIL 0 keep first - # dne FAIL PASS 0 keep first - # dne PASS FAIL 0 keep first - # dne PASS PASS 0 keep first - # FAIL FAIL FAIL 0 keep reports that fail - # FAIL FAIL PASS 0 keep reports that fail - # FAIL PASS FAIL 0 keep first pass after a failure - # FAIL PASS PASS 0 keep first pass after a failure - # PASS FAIL FAIL 0 keep reports that fail - # PASS FAIL PASS 0 keep reports that fail - # PASS PASS FAIL 0 last pass before a failure - # PASS PASS PASS 1 - - # we only delete the previous report (index [-2]) iff: - # - the current report was a pass - # - the previous exists and was a pass - # - the previous-previous exists and was a pass - - push @delete_report_ids, $report_statuses[-2]{id} - if $report_statuses[-1]{status} eq 'pass' - and $report_statuses[-2] and $report_statuses[-2]{status} eq 'pass' - and $report_statuses[-3] and $report_statuses[-3]{status} eq 'pass'; - - # forget about the oldest report if we are watching at least 3. - shift @report_statuses if $report_statuses[-3]; - } - } + ->search({ 'device_report.id' => { -in => \[ $query, $device->id ] } }); - print "\n"; + my $device_reports_deleted; + my $validation_results_deleted; - my ($device_reports_deleted, $validation_results_deleted) = (0,0); + print "\n"; if ($self->dry_run) { - say 'Would delete ', scalar(@delete_report_ids), ' reports for device id ', $device->id, + $device_reports_deleted = $device_report_rs->count; + say 'Would delete ', $device_reports_deleted, ' reports for device id ', $device->id, ' out of ', $report_count, ' examined.'; } else { # delete all reports that we identified for deletion # this may also cause cascade deletes on validation_state, validation_state_member. - say 'deleting ', scalar(@delete_report_ids), ' reports for device id ', $device->id, - ' out of ', $report_count, ' examined...'; - - # delete reports 100 records at a time - my $pager = Data::Page->new(scalar @delete_report_ids, 100); - for ($pager->first_page .. $pager->last_page) { - my @ids = $pager->splice(\@delete_report_ids); - last if not @ids; - $pager->current_page($pager->current_page + 1); + say 'deleting reports for device id ', $device->id, '...'; - $device_reports_deleted += $device->delete_related('device_reports', { id => { -in => \@ids } }); - } + $device_reports_deleted = $device_report_rs->delete; + say 'deleted ', $device_reports_deleted, ' reports for device id ', $device->id, + ' out of ', $report_count, ' examined'; - # delete all newly-orphaned validation_result rows for this device + # now delete all newly-orphaned validation_result rows for this device $validation_results_deleted = $device->delete_related('validation_results', { 'validation_state_members.validation_state_id' => undef }, { join => 'validation_state_members' }, ); + + say 'deleted ', $validation_results_deleted, + ' validation_result rows for device id ', $device->id; } print "\n"; diff --git a/sql/migrations/0171-delete-old-device_reports.sql b/sql/migrations/0171-delete-old-device_reports.sql new file mode 100644 index 000000000..dc2424e69 --- /dev/null +++ b/sql/migrations/0171-delete-old-device_reports.sql @@ -0,0 +1,29 @@ +SELECT run_migration(171, $$ + + -- create a placeholder validation_state record for every device_report without a + -- validation_state record pointing to it + -- (reports created before 2019-01-08 were not associated with validation_states, so we had to + -- match them up as best we could based on similar timestamps, and not everyone found his + -- match.) + insert into validation_state (validation_plan_id, created, status, device_report_id, device_id, hardware_product_id) + ( + select + hardware_product.validation_plan_id, + device_report.created, + 'error', + device_report.id, + device_report.device_id, + device.hardware_product_id + from device_report + left join validation_state on validation_state.device_report_id = device_report.id + left join device on device_report.device_id = device.id + left join hardware_product on hardware_product.id = device.hardware_product_id + where validation_state.id is null + ); + + -- now we can run bin/conch thin_device_reports, which will delete all device_report rows + -- (cascading to validation_state, validation_state_member) which are older than six months, + -- except for the most recent report of each validation status (error, fail, pass).. + -- and then delete newly-orphaned validation_result rows. + +$$);