Skip to content

Commit

Permalink
Allow querying last error that caused lock unhealthiness. Retry test …
Browse files Browse the repository at this point in the history
…if that last error is retryable.
  • Loading branch information
FooBarWidget committed Sep 11, 2021
1 parent f145785 commit 7662619
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 9 deletions.
27 changes: 27 additions & 0 deletions lib/distributed-lock-google-cloud-storage/lock.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def initialize(bucket_name:, path:, instance_identity_prefix: self.class.default
@owner = nil
@metageneration = nil
@refresher_thread = nil
@refresher_error = nil

# The refresher generation is incremented every time we shutdown
# the refresher thread. It allows the refresher thread to know
Expand Down Expand Up @@ -315,6 +316,7 @@ def abandon
#
# @return [Boolean]
# @raise [NotLockedError] This lock was not obtained.
# @see #last_refresh_error
def healthy?
@state_mutex.synchronize do
raise NotLockedError, 'Not locked' if !unsynced_locked_according_to_internal_state?
Expand All @@ -323,6 +325,8 @@ def healthy?
end

# Checks whether the lock is healthy. See {#healthy?} for the definition of "healthy".
# Use {#last_refresh_error} to query the last error that caused the lock to be declared
# unhealthy.
#
# It only makes sense to call this method after having obtained this lock.
#
Expand All @@ -333,6 +337,19 @@ def check_health!
raise LockUnhealthyError, 'Lock is not healthy' if !healthy?
end

# Returns the last error that caused the lock to be declared unhealthy.
#
# Don't use this method to check whether the lock is _currently_ healthy.
# If this lock has ever been unhealthy, then this method returns a non-nil value
# even if the lock is currently healthy.
#
# @return [StandardError, nil]
def last_refresh_error
@state_mutex.synchronize do
@refresher_error
end
end


private

Expand Down Expand Up @@ -565,7 +582,17 @@ def refresh_lock(refresher_generation)
end
log_debug { "Done refreshing lock. metageneration=#{file.metageneration}" }
true

rescue => e
@state_mutex.synchronize do
if @refresher_generation != refresher_generation
log_debug { 'Abort refreshing lock' }
return true
end

@refresher_error = e
end

log_error { "Error refreshing lock: #{e}" }
[false, permanent_failure]
end
Expand Down
33 changes: 28 additions & 5 deletions spec/lock_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def force_erase_lock_object
expect(lock).not_to be_owned_according_to_server
end

it 'has no refresh error' do
expect(lock.last_refresh_error).to be_nil
end

specify 'checking for health is not possible due to being unlocked' do
expect { lock.healthy? }.to \
raise_error(DistributedLock::GoogleCloudStorage::NotLockedError)
Expand Down Expand Up @@ -113,6 +117,7 @@ def force_erase_lock_object
expect(@lock).to be_owned_according_to_internal_state
expect(@lock).to be_owned_according_to_server
expect(@lock).to be_healthy
expect(@lock.last_refresh_error).to be_nil
expect { @lock.check_health! }.not_to raise_error
end

Expand All @@ -131,6 +136,7 @@ def force_erase_lock_object
owned_according_to_internal_state: @lock2.owned_according_to_internal_state?,
owned_according_to_server: @lock2.owned_according_to_server?,
healthy: @lock2.healthy?,
last_refresh_error: @lock2.last_refresh_error,
}
end

Expand All @@ -155,6 +161,7 @@ def force_erase_lock_object
expect(result[:owned_according_to_internal_state]).to be_truthy
expect(result[:owned_according_to_server]).to be_truthy
expect(result[:healthy]).to be_truthy
expect(result[:last_refresh_error]).to be_nil
end

it 'raises AlreadyLockedError if called twice by the same instance and thread' do
Expand Down Expand Up @@ -183,6 +190,7 @@ def force_erase_lock_object
expect(@lock).to be_owned_according_to_internal_state
expect(@lock).to be_owned_according_to_server
expect(@lock).to be_healthy
expect(@lock.last_refresh_error).to be_nil
expect { @lock.check_health! }.not_to raise_error
end

Expand All @@ -209,6 +217,7 @@ def force_erase_lock_object
expect(@lock).to be_owned_according_to_internal_state
expect(@lock).to be_owned_according_to_server
expect(@lock).to be_healthy
expect(@lock.last_refresh_error).to be_nil
expect { @lock.check_health! }.not_to raise_error
end

Expand All @@ -224,6 +233,7 @@ def force_erase_lock_object
expect(@lock).to be_owned_according_to_internal_state
expect(@lock).to be_owned_according_to_server
expect(@lock).to be_healthy
expect(@lock.last_refresh_error).to be_nil
expect { @lock.check_health! }.not_to raise_error
end

Expand All @@ -238,20 +248,25 @@ def force_erase_lock_object
expect(@lock).to be_owned_according_to_internal_state
expect(@lock).to be_owned_according_to_server
expect(@lock).to be_healthy
expect(@lock.last_refresh_error).to be_nil
expect { @lock.check_health! }.not_to raise_error
end
end


describe '#unlock' do
after :each do
@lock.abandon if @lock
if @lock
@lock.abandon
raise @lock.last_refresh_error if exception_to_retry?(@lock.last_refresh_error)
end
end

def lock_and_unlock
@lock.lock(timeout: 0)
deleted = nil
expect { deleted = @lock.unlock }.not_to raise_error
e = catch_non_retryable_exception { deleted = @lock.unlock }
expect(e).to be_nil
deleted
end

Expand Down Expand Up @@ -292,8 +307,11 @@ def lock_and_unlock

@lock.lock(timeout: 0)
force_erase_lock_object

deleted = nil
expect { deleted = @lock.unlock }.not_to raise_error
e = catch_non_retryable_exception { deleted = @lock.unlock }

expect(e).to be_nil
expect(deleted).to be_falsey
expect(@lock).not_to be_locked_according_to_internal_state
expect(@lock).not_to be_locked_according_to_server
Expand Down Expand Up @@ -326,7 +344,10 @@ def lock_and_unlock
end

after :each do
@lock.abandon if @lock
if @lock
@lock.abandon
raise @lock.last_refresh_error if exception_to_retry?(@lock.last_refresh_error)
end
end

it 'updates the update time' do
Expand All @@ -348,12 +369,13 @@ def lock_and_unlock
f.metadata['something'] = '123'
end
expect(file.metageneration).not_to eq(orig_metageneration)
eventually(timeout: 10) do
eventually(timeout: 30) do
!@lock.healthy?
end
expect { @lock.check_health! }.to \
raise_error(DistributedLock::GoogleCloudStorage::LockUnhealthyError)
expect(log_output.string).to include('Lock object has an unexpected metageneration number')
expect(@lock.last_refresh_error).not_to be_nil
end

it 'declares unhealthiness when the lock object is deleted' do
Expand All @@ -370,6 +392,7 @@ def lock_and_unlock
expect { @lock.check_health! }.to \
raise_error(DistributedLock::GoogleCloudStorage::LockUnhealthyError)
expect(log_output.string).to include('Lock object has been unexpectedly deleted')
expect(@lock.last_refresh_error).not_to be_nil
end
end
end
37 changes: 33 additions & 4 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
require 'rspec/retry'

module Helpers
EXCEPTIONS_TO_RETRY = [
Google::Cloud::ResourceExhaustedError,
Google::Cloud::UnavailableError,
]

def self.print_all_thread_backtraces
puts '-------- Begin backtrace dump --------'
Thread.list.each do |thr|
Expand All @@ -17,6 +22,33 @@ def self.print_all_thread_backtraces
puts '-------- End backtrace dump --------'
end

def exception_to_retry?(ex)
EXCEPTIONS_TO_RETRY.any? do |ex_class|
ex.is_a?(ex_class)
end
end

# Runs the given block.
# If it raises a retryable exception, then pass it through.
# If it raises a non-retryable exception, then returns that exception.
# Otherwise, returns nil.
#
# This is used as an alternative to `expect { block }.not_to raise_error`
# which lets retryable exceptions through so that rspec-retry can retry
# the test.
def catch_non_retryable_exception
begin
yield
nil
rescue => e
if exception_to_retry?(e)
raise e
else
e
end
end
end

def require_envvar(name)
value = ENV[name]
raise ArgumentError, "Required environment variable: #{name}" if value.to_s.empty?
Expand Down Expand Up @@ -56,10 +88,7 @@ def consistently(duration:, interval: 0.1)
c.display_try_failure_messages = true
c.default_retry_count = 3
c.default_sleep_interval = 10
c.exceptions_to_retry = [
Google::Cloud::ResourceExhaustedError,
Google::Cloud::UnavailableError
]
c.exceptions_to_retry = Helpers::EXCEPTIONS_TO_RETRY
end

Signal.trap('QUIT') { Helpers.print_all_thread_backtraces }

0 comments on commit 7662619

Please sign in to comment.