From a2a24e93b41ea2a1ab45914fc2240c66a7d95f68 Mon Sep 17 00:00:00 2001 From: Johnathan Martin Date: Mon, 30 Jan 2017 16:47:27 -0800 Subject: [PATCH] refactor Spotlight::Resource reindexing stats tracking and the Spotlight::ReindexProgress that used to use it * refactor the way Spotlight::Resource tracks indexing stats * add items_reindexed_estimate field to create_spotlight_reindexing_log_entries migration * resource.rb: refactor Spotlight::Resource#reindex so that it takes an optional ReindexingLogEntry; use the log entry to track the stats that Resource used to keep on itself. remove unused methods. update tests. * reindex_job.rb: use before_perform hook to update items_reindexed_estimate on the reindexing log entry, if there is one. pass the log entry to resource.reindex. add tests. * refactor Spotlight::ReindexProgress to get a ReindexingLogEntry from an Exhibit, instead of dealing with a Resource list * refactor Spotlight::ReindexProgress so that it takes a Spotlight::Exhibit (instead of a Resource list), and uses the current ReindexingLogEntry from the Exhibit (if there is one). fix Exhibit's call to ReindexProgress and have it initialize the log entries it creates to 0. update exhibit_spec.rb. * rewrite reindex_progress_spec.rb * reindex_monitor_spec.rb: fix the simulation of a recent reindexing job, slightly relax expectations for reindexing progress text to accommodate multi-digit numbers --- app/jobs/spotlight/reindex_job.rb | 16 +- app/models/spotlight/exhibit.rb | 4 +- app/models/spotlight/reindex_progress.rb | 48 ++-- app/models/spotlight/resource.rb | 64 +---- ...create_spotlight_reindexing_log_entries.rb | 1 + .../javascript/reindex_monitor_spec.rb | 6 +- spec/jobs/spotlight/reindex_job_spec.rb | 12 + spec/models/spotlight/exhibit_spec.rb | 25 +- .../models/spotlight/reindex_progress_spec.rb | 224 ++++++++++-------- spec/models/spotlight/resource_spec.rb | 44 +--- 10 files changed, 217 insertions(+), 227 deletions(-) diff --git a/app/jobs/spotlight/reindex_job.rb b/app/jobs/spotlight/reindex_job.rb index a0f85638d..62e041cd1 100644 --- a/app/jobs/spotlight/reindex_job.rb +++ b/app/jobs/spotlight/reindex_job.rb @@ -4,8 +4,14 @@ module Spotlight class ReindexJob < ActiveJob::Base queue_as :default - before_enqueue do |job| - resource_list(job.arguments.first).each(&:waiting!) + before_perform do |job| + job_log_entry = log_entry(job) + next unless job_log_entry + + items_reindexed_estimate = resource_list(job.arguments.first).sum do |resource| + resource.document_builder.documents_to_index.size + end + job_log_entry.update(items_reindexed_estimate: items_reindexed_estimate) end around_perform do |job, block| @@ -22,8 +28,10 @@ class ReindexJob < ActiveJob::Base job_log_entry.succeeded! if job_log_entry end - def perform(exhibit_or_resources, _log_entry = nil) - resource_list(exhibit_or_resources).each(&:reindex) + def perform(exhibit_or_resources, log_entry = nil) + resource_list(exhibit_or_resources).each do |resource| + resource.reindex(log_entry) + end end private diff --git a/app/models/spotlight/exhibit.rb b/app/models/spotlight/exhibit.rb index 80b66da4a..1c11fee78 100644 --- a/app/models/spotlight/exhibit.rb +++ b/app/models/spotlight/exhibit.rb @@ -101,7 +101,7 @@ def requested_by end def reindex_progress - @reindex_progress ||= ReindexProgress.new(resources) if resources + @reindex_progress ||= ReindexProgress.new(self) end protected @@ -111,7 +111,7 @@ def sanitize_description end def new_reindexing_log_entry(user = nil) - Spotlight::ReindexingLogEntry.create(exhibit: self, user: user, items_reindexed_count: resources.size, job_status: 'unstarted') + Spotlight::ReindexingLogEntry.create(exhibit: self, user: user, items_reindexed_count: 0, job_status: 'unstarted') end end end diff --git a/app/models/spotlight/reindex_progress.rb b/app/models/spotlight/reindex_progress.rb index 28177d882..5848f97de 100644 --- a/app/models/spotlight/reindex_progress.rb +++ b/app/models/spotlight/reindex_progress.rb @@ -2,51 +2,47 @@ module Spotlight ## # ReindexProgress is a class that models the progress of reindexing a list of resources class ReindexProgress - def initialize(resource_list) - @resources = if resource_list.present? - resource_list.order('updated_at') - else - Spotlight::Resource.none - end + attr_reader :exhibit + + def initialize(exhibit) + @exhibit = exhibit end def recently_in_progress? - any_waiting? || (!!finished_at && finished_at > Spotlight::Engine.config.reindex_progress_window.minutes.ago) + return false if current_log_entry.blank? + return true if current_log_entry.in_progress? + + current_log_entry.end_time.present? && (current_log_entry.end_time > Spotlight::Engine.config.reindex_progress_window.minutes.ago) end def started_at - return unless resources.present? - - enqueued_resources = resources.select(&:enqueued_at?) - - return unless enqueued_resources.any? - - @started ||= enqueued_resources.min_by(&:enqueued_at).enqueued_at + current_log_entry.try(:start_time) end def updated_at - @updated ||= resources.maximum(:updated_at) || started_at + current_log_entry.try(:updated_at) end def finished? - completed_resources.present? && !any_waiting? + return false if current_log_entry.blank? + current_log_entry.succeeded? || current_log_entry.failed? end def finished_at - return unless finished? - @finished ||= completed_resources.max_by(&:last_indexed_finished).last_indexed_finished + current_log_entry.try(:end_time) end def total - @total ||= resources.map(&:last_indexed_estimate).compact.sum + current_log_entry.try(:items_reindexed_estimate) end def completed - @completed ||= resources.map(&:last_indexed_count).compact.sum + current_log_entry.try(:items_reindexed_count) end def errored? - resources.any?(&:errored?) + return false if current_log_entry.blank? + current_log_entry.failed? end def as_json(*) @@ -63,10 +59,8 @@ def as_json(*) private - attr_reader :resources - - def any_waiting? - resources.any?(&:waiting?) + def current_log_entry + exhibit.reindexing_log_entries.where.not(job_status: 'unstarted').first end def localized_start_time @@ -83,9 +77,5 @@ def localized_updated_time return unless updated_at I18n.l(updated_at, format: :short) end - - def completed_resources - resources.completed - end end end diff --git a/app/models/spotlight/resource.rb b/app/models/spotlight/resource.rb index d0af9c648..fe548127f 100644 --- a/app/models/spotlight/resource.rb +++ b/app/models/spotlight/resource.rb @@ -16,19 +16,8 @@ class Resource < ActiveRecord::Base has_many :solr_document_sidecars serialize :data, Hash - store :metadata, accessors: [ - :enqueued_at, - :last_indexed_estimate, - :last_indexed_count, - :last_index_elapsed_time, - :last_indexed_finished - ], coder: JSON - enum index_status: [:waiting, :completed, :errored] - - around_index :reindex_with_logging after_index :commit - after_index :completed! after_index :touch_exhibit! ## @@ -42,27 +31,9 @@ def save_and_index(*args) ## # Enqueue an asynchronous reindexing job for this resource def reindex_later - waiting! Spotlight::ReindexJob.perform_later(self) end - def waiting! - update(enqueued_at: Time.zone.now) - super - end - - def enqueued_at - cast_to_date_time(super) - end - - def enqueued_at? - enqueued_at.present? - end - - def last_indexed_finished - cast_to_date_time(super) - end - def document_model exhibit.blacklight_config.document_model if exhibit end @@ -72,43 +43,28 @@ def document_model # Index the result of {#to_solr} into the index in batches of {#batch_size} # # @return [Integer] number of records indexed - def reindex + # rubocop:disable Metrics/MethodLength + def reindex(reindexing_log_entry = nil) benchmark "Reindexing #{self} (batch size: #{batch_size})" do count = 0 run_callbacks :index do document_builder.documents_to_index.each_slice(batch_size) do |batch| write_to_index(batch) - update(last_indexed_count: (count += batch.length)) + count += batch.length + reindexing_log_entry.update(items_reindexed_count: count) if reindexing_log_entry end count end end end + # rubocop:enable Metrics/MethodLength def document_builder @document_builder ||= document_builder_class.new(self) end - protected - - def reindex_with_logging - time_start = Time.zone.now - - update(indexed_at: time_start, - last_indexed_estimate: document_builder.documents_to_index.size, - last_indexed_finished: nil, - last_index_elapsed_time: nil) - - count = yield - - time_end = Time.zone.now - update(last_indexed_count: count, - last_indexed_finished: time_end, - last_index_elapsed_time: time_end - time_start) - end - private def blacklight_solr @@ -144,16 +100,6 @@ def touch_exhibit! def write? Spotlight::Engine.config.writable_index end - - def cast_to_date_time(value) - return unless value - - if defined? ActiveModel::Type::DateTime - ActiveModel::Type::DateTime.new.cast(value) - else - ActiveRecord::Type::DateTime.new.type_cast_from_database(value) - end - end end end end diff --git a/db/migrate/20170105222939_create_spotlight_reindexing_log_entries.rb b/db/migrate/20170105222939_create_spotlight_reindexing_log_entries.rb index f04bc005d..50143b925 100644 --- a/db/migrate/20170105222939_create_spotlight_reindexing_log_entries.rb +++ b/db/migrate/20170105222939_create_spotlight_reindexing_log_entries.rb @@ -2,6 +2,7 @@ class CreateSpotlightReindexingLogEntries < ActiveRecord::Migration def change create_table :spotlight_reindexing_log_entries do |t| t.integer :items_reindexed_count + t.integer :items_reindexed_estimate t.datetime :start_time t.datetime :end_time t.integer :job_status diff --git a/spec/features/javascript/reindex_monitor_spec.rb b/spec/features/javascript/reindex_monitor_spec.rb index c3bdf13d3..b5452753e 100644 --- a/spec/features/javascript/reindex_monitor_spec.rb +++ b/spec/features/javascript/reindex_monitor_spec.rb @@ -6,7 +6,7 @@ let(:exhibit_curator) { FactoryGirl.create(:exhibit_curator, exhibit: exhibit) } before do - resources.each(&:waiting!) + FactoryGirl.create(:in_progress_reindexing_log_entry, exhibit: exhibit, items_reindexed_estimate: 5) login_as exhibit_curator visit spotlight.admin_exhibit_catalog_path(exhibit) end @@ -14,8 +14,8 @@ it 'is rendered on the item admin page' do expect(page).to have_css('.panel.index-status', visible: true) within('.panel.index-status') do - expect(page).to have_css('p', text: /Began reindexing a total of \d items/) - expect(page).to have_css('p', text: /Reindexed \d of \d items/) + expect(page).to have_css('p', text: /Began reindexing a total of \d+ items/) + expect(page).to have_css('p', text: /Reindexed \d+ of \d+ items/) end end end diff --git a/spec/jobs/spotlight/reindex_job_spec.rb b/spec/jobs/spotlight/reindex_job_spec.rb index e50538ec8..dd2baa477 100644 --- a/spec/jobs/spotlight/reindex_job_spec.rb +++ b/spec/jobs/spotlight/reindex_job_spec.rb @@ -46,6 +46,18 @@ expect(log_entry).to receive(:failed!) expect { subject.perform_now }.to raise_error unexpected_error end + + it 'updates the items_reindexed_estimate field on the log entry' do + expect(log_entry).to receive(:update).with(items_reindexed_estimate: 1) + subject.perform_now + end + + it 'passes log_entry to the resource.reindex call' do + # ActiveJob will reload the collection, so we go through a little trouble: + expect_any_instance_of(Spotlight::Resource).to receive(:reindex).with(log_entry).exactly(:once) + # expect(resource).to receive(:reindex).with(log_entry) + subject.perform_now + end end end diff --git a/spec/models/spotlight/exhibit_spec.rb b/spec/models/spotlight/exhibit_spec.rb index 999a68bba..41dabc1ed 100644 --- a/spec/models/spotlight/exhibit_spec.rb +++ b/spec/models/spotlight/exhibit_spec.rb @@ -190,7 +190,7 @@ describe '#reindex_later' do subject { FactoryGirl.create(:exhibit) } - let(:log_entry) { Spotlight::ReindexingLogEntry.new(exhibit: subject, user: user, items_reindexed_count: subject.resources.size) } + let(:log_entry) { Spotlight::ReindexingLogEntry.new(exhibit: subject, user: user, items_reindexed_count: 0) } context 'user is omitted' do let(:user) { nil } @@ -215,6 +215,25 @@ end end + describe '#new_reindexing_log_entry' do + let(:user) { FactoryGirl.build(:user) } + it 'returns a properly configured Spotlight::ReindexingLogEntry instance' do + reindexing_log_entry = subject.send(:new_reindexing_log_entry, user) + expect(reindexing_log_entry.exhibit).to eq subject + expect(reindexing_log_entry.user).to eq user + expect(reindexing_log_entry.items_reindexed_count).to eq 0 + expect(reindexing_log_entry.unstarted?).to be true + end + + it 'does not require user the user parameter' do + reindexing_log_entry = subject.send(:new_reindexing_log_entry) + expect(reindexing_log_entry.exhibit).to eq subject + expect(reindexing_log_entry.user).to be nil + expect(reindexing_log_entry.items_reindexed_count).to eq 0 + expect(reindexing_log_entry.unstarted?).to be true + end + end + describe '#solr_documents' do let(:blacklight_config) { Blacklight::Configuration.new } let(:slug) { 'some_slug' } @@ -284,7 +303,9 @@ describe '#reindex_progress' do it 'returns a Spotlight::ReindexProgress' do - expect(subject.reindex_progress).to be_a Spotlight::ReindexProgress + reindex_progress = subject.reindex_progress + expect(reindex_progress).to be_a Spotlight::ReindexProgress + expect(reindex_progress.exhibit).to eq subject end end end diff --git a/spec/models/spotlight/reindex_progress_spec.rb b/spec/models/spotlight/reindex_progress_spec.rb index 04175a33e..11b106899 100644 --- a/spec/models/spotlight/reindex_progress_spec.rb +++ b/spec/models/spotlight/reindex_progress_spec.rb @@ -1,139 +1,171 @@ describe Spotlight::ReindexProgress, type: :model do - let(:start_time) { 20.minutes.ago.at_beginning_of_minute } - let(:finish_time) { 5.minutes.ago.at_beginning_of_minute } - let(:updated_time) { 1.minute.ago.at_beginning_of_minute } - let!(:first_resource) do - FactoryGirl.create( - :resource, - updated_at: updated_time, - indexed_at: start_time, - enqueued_at: start_time, - last_indexed_finished: start_time, - last_indexed_estimate: 7, - last_indexed_count: 5, - index_status: 1 - ) - end - let!(:last_resource) do - FactoryGirl.create( - :resource, - updated_at: finish_time, - indexed_at: finish_time, - enqueued_at: start_time, - last_indexed_finished: finish_time, - last_indexed_estimate: 3, - last_indexed_count: 2, - index_status: 1 - ) + let(:reindexing_log_entries) do + [ + # failed is the later of the two, and thus the return value for current_log_entry + FactoryGirl.create(:reindexing_log_entry, items_reindexed_estimate: 11), + FactoryGirl.create(:failed_reindexing_log_entry, items_reindexed_estimate: 12) + ] end + let(:exhibit) { FactoryGirl.create(:exhibit, reindexing_log_entries: reindexing_log_entries) } - let(:new_resource) do - FactoryGirl.create( - :resource, - last_indexed_count: 10, - last_indexed_estimate: 15, - index_status: 0 - ) - end + let(:subject) { described_class.new(exhibit) } - let(:resources) { [first_resource, last_resource, new_resource] } - subject { described_class.new(Spotlight::Resource.all) } - let(:json) { JSON.parse(subject.to_json) } + describe '#started_at' do + it 'returns start_time for current_log_entry' do + expect(subject.started_at).to eq(Time.zone.parse('2017-01-10 23:00:00')) + end + end - before do - allow(subject).to receive_messages(completed_resources: resources) + describe '#updated_at' do + # disable SkipsModelValidations cop for this test, because it complains about #touch, which is convenient here + # rubocop:disable Rails/SkipsModelValidations + it 'returns the time of last update for current_log_entry' do + lower_bound = Time.zone.now + subject.send(:current_log_entry).touch + upper_bound = Time.zone.now + + expect(subject.updated_at).to be_between(lower_bound, upper_bound) + end + # rubocop:enable Rails/SkipsModelValidations end - describe '#recently_in_progress?' do - let(:resources) { [first_resource, last_resource] } - context 'when the last resource has been updated within the allotted time' do - it 'is true' do - expect(subject).to be_recently_in_progress - end + describe '#finished?' do + it 'returns true if current_log_entry is succeeded or failed' do + expect(subject.finished?).to be true end + end - context 'when any of the resources is marked as waiting' do - before do - first_resource.waiting! - end - it 'is true' do - expect(subject).to be_recently_in_progress - end + describe '#finished_at' do + it 'returns end_time for current_log_entry' do + expect(subject.finished_at).to eq(Time.zone.parse('2017-01-10 23:05:00')) end + end - context 'when the last resources has been updated outside of the allotted time ' do - before do - expect(last_resource).to receive_messages(last_indexed_finished: 12.minutes.ago) - end - it 'is false' do - expect(subject).not_to be_recently_in_progress - end + describe '#total' do + it 'returns items_reindexed_estimate for current_log_entry' do + expect(subject.total).to be 12 end + end - it 'is included in the json' do - expect(json['recently_in_progress']).to be true + describe '#completed' do + it 'returns items_reindexed_count for current_log_entry' do + expect(subject.completed).to be 10 end end - describe '#started_at' do - it 'returns the indexed_at attribute of the first resource' do - expect(subject.started_at).to eq start_time + describe '#errored?' do + it 'returns true for log entries marked as failed' do + expect(subject.errored?).to be true end + end - it 'is included in the json as a localized string' do - expect(json['started_at']).to eq I18n.l(start_time, format: :short) + describe '#as_json' do + it 'returns a hash with values for current_log_entry via the various helper methods' do + expect(subject.as_json).to eq( + recently_in_progress: subject.recently_in_progress?, + started_at: subject.send(:localized_start_time), + finished_at: subject.send(:localized_finish_time), + updated_at: subject.send(:localized_updated_time), + total: subject.total, + completed: subject.completed, + errored: subject.errored? + ) end + end - context 'with unqueued resources' do - subject { described_class.new(Spotlight::Resource.where(id: new_resource.id)) } + describe '#recently_in_progress?' do + context 'there is no end_time for current_log_entry' do + let(:current_log_entry) { FactoryGirl.create(:in_progress_reindexing_log_entry) } + let(:exhibit) { FactoryGirl.create(:exhibit, reindexing_log_entries: [current_log_entry]) } - it 'returns the indexed_at attribute of the first resource' do - expect(subject.started_at).to be_nil + it 'returns true' do + expect(subject.recently_in_progress?).to be true end end - end - describe '#updated_at' do - let(:resources) { [first_resource, last_resource] } + context 'current_log_entry has an end_time less than Spotlight::Engine.config.reindex_progress_window.minutes.ago' do + let(:current_log_entry) { FactoryGirl.create(:recent_reindexing_log_entry, end_time: Time.zone.now) } + let(:exhibit) { FactoryGirl.create(:exhibit, reindexing_log_entries: [current_log_entry]) } - it 'returns the updated_at attribute of the last resource' do - expect(subject.updated_at).to eq updated_time + it 'returns true' do + expect(subject.recently_in_progress?).to be true + end end - it 'is included in the json as a localized string under the updated_at attribute' do - expect(json['updated_at']).to eq I18n.l(updated_time, format: :short) + context 'current_log_entry is unstarted ' do + let(:current_log_entry) { FactoryGirl.create(:unstarted_reindexing_log_entry) } + let(:exhibit) { FactoryGirl.create(:exhibit, reindexing_log_entries: [current_log_entry]) } + + it 'returns false' do + expect(subject.recently_in_progress?).to be false + end end end - describe '#finished_at' do - let(:resources) { [first_resource, last_resource] } + describe 'private methods' do + describe '#current_log_entry' do + let(:reindexing_log_entries) do + [ + FactoryGirl.create(:unstarted_reindexing_log_entry), + FactoryGirl.create(:reindexing_log_entry), + FactoryGirl.create(:in_progress_reindexing_log_entry), + FactoryGirl.create(:failed_reindexing_log_entry), + FactoryGirl.create(:unstarted_reindexing_log_entry) + ] + end - it 'returns the updated_at attribute of the last resource' do - expect(subject.finished_at).to eq finish_time + it 'returns the latest log entry that is not unstarted' do + expect(subject.send(:current_log_entry)).to eq(reindexing_log_entries[2]) + end end - it 'is included in the json as a localized string under the updated_at attribute' do - expect(json['finished_at']).to eq I18n.l(finish_time, format: :short) + describe '#localized_start_time' do + it 'returns the short formatted start time' do + expect(subject.send(:localized_start_time)).to eq I18n.l(subject.started_at, format: :short) + end end - end - describe '#total' do - it 'sums the resources last_indexed_estimate' do - expect(subject.total).to eq 25 + describe '#localized_finish_time' do + it 'returns the short formatted end time' do + expect(subject.send(:localized_finish_time)).to eq I18n.l(subject.finished_at, format: :short) + end end - it 'is included in the json' do - expect(json['total']).to eq 25 + describe '#localized_updated_time' do + it 'returns the short formatted last updated time' do + expect(subject.send(:localized_updated_time)).to eq I18n.l(subject.updated_at, format: :short) + end end end - describe '#completed' do - it 'sums the resources last_indexed_count' do - expect(subject.completed).to eq 17 - end - - it 'is included in the json' do - expect(json['completed']).to eq 17 + context 'current_log_entry is nil' do + let(:reindexing_log_entries) { [] } + + # rubocop:disable RSpec/MultipleExpectations + it 'methods return gracefully' do + expect(subject.send(:current_log_entry)).to be nil + + expect(subject.recently_in_progress?).to be false + expect(subject.started_at).to be nil + expect(subject.updated_at).to be nil + expect(subject.finished?).to be false + expect(subject.finished_at).to be nil + expect(subject.total).to be nil + expect(subject.completed).to be nil + expect(subject.errored?).to be false + expect(subject.send(:localized_start_time)).to be nil + expect(subject.send(:localized_finish_time)).to be nil + expect(subject.send(:localized_updated_time)).to be nil + expect(subject.as_json).to eq( + recently_in_progress: false, + started_at: nil, + finished_at: nil, + updated_at: nil, + total: nil, + completed: nil, + errored: false + ) end + # rubocop:enable RSpec/MultipleExpectations end end diff --git a/spec/models/spotlight/resource_spec.rb b/spec/models/spotlight/resource_spec.rb index ce140f60d..f2b65775a 100644 --- a/spec/models/spotlight/resource_spec.rb +++ b/spec/models/spotlight/resource_spec.rb @@ -4,29 +4,6 @@ end let(:exhibit) { FactoryGirl.create(:exhibit) } - describe '#enqueued_at' do - it 'casts values to Time objects' do - t = Time.zone.now.at_beginning_of_minute - subject.enqueued_at = t - - expect(subject.enqueued_at).to eq t - end - - it 'handles blank values' do - subject.enqueued_at = nil - expect(subject.enqueued_at).to be_nil - end - end - - describe '#last_indexed_finished' do - it 'casts values to Time objects' do - t = Time.zone.now.at_beginning_of_minute - subject.last_indexed_finished = t - - expect(subject.last_indexed_finished).to eq t - end - end - describe '#reindex' do context 'with a provider that generates ids' do subject do @@ -55,6 +32,18 @@ subject.reindex end + context 'reindexing_log_entry is provided' do + before do + allow(subject.send(:blacklight_solr)).to receive(:update) + end + + it 'updates the count of reindexed items in the log entry' do + reindexing_log_entry = double(Spotlight::ReindexingLogEntry) + expect(reindexing_log_entry).to receive(:update).with(items_reindexed_count: 1) + subject.reindex reindexing_log_entry + end + end + context 'when the index is not writable' do before do allow(Spotlight::Engine.config).to receive_messages(writable_index: false) @@ -90,15 +79,6 @@ expect(subject.exhibit).to have_received(:touch) end - - it 'records indexing metadata as document attributes' do - subject.reindex - - expect(subject.indexed_at).to be > Time.zone.now - 5.seconds - expect(subject.last_indexed_estimate).to eq 2 - expect(subject.last_indexed_count).to eq 2 - expect(subject.last_index_elapsed_time).to be < 1 - end end end end