Skip to content

Commit

Permalink
fix(*): avoid creating multiple timers to run the same active check (#…
Browse files Browse the repository at this point in the history
…157)

* fix(*): avoid creating multiple timers to run the same active check (#156)

* docs(changelog): add changelog
  • Loading branch information
windmgc authored May 16, 2024
1 parent a22ac5a commit 2dc6ddb
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 12 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ Versioning is strictly based on [Semantic Versioning](https://semver.org/)
* push commit and tag
* upload rock to luarocks: `luarocks upload rockspecs/[name] --api-key=abc`

### Unreleased

* Fix: avoid creating multiple timers to run the same active check [#157](https://github.com/Kong/lua-resty-healthcheck/pull/157)

### 3.0.1 (22-Dec-2023)

* Fix: fix delay clean logic when multiple healthchecker was started [#146](https://github.com/Kong/lua-resty-healthcheck/pull/146)
Expand Down
48 changes: 48 additions & 0 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ local table_insert = table.insert
local table_remove = table.remove
local table_concat = table.concat
local string_format = string.format
local math_max = math.max
local ssl = require("ngx.ssl")
local resty_timer = require "resty.timer"
local bit = require("bit")
Expand Down Expand Up @@ -1201,6 +1202,32 @@ local function renew_periodic_lock(shm, key)
end


local function get_callback_lock(shm, key, ttl)
local value = shm:get(key)
if value == nil then
-- no worker is checking, try to acquire the lock
local ok, err = shm:add(key, true, ttl or LOCK_PERIOD)
if not ok then
if err == "exists" then
-- another worker got the lock before
return false
end

return nil, err
end

return true
end

return false
end


local function remove_callback_lock(shm, key)
return shm:delete(key)
end


--- Active health check callback function.
-- @param self the checker object this timer runs on
-- @param health_mode either "healthy" or "unhealthy" to indicate what check
Expand All @@ -1209,10 +1236,27 @@ local function checker_callback(self, health_mode)
self.checker_callback_count = self.checker_callback_count + 1
end

-- Set a callback pending lock will exist for 2x the time of the active check.
-- An active check should be finished within this time and next timer will be
-- executed to exit a pending status.
local callback_pending_ttl = (math_max(self.checks.active.healthy.active and
self.checks.active.healthy.interval or 0,
self.checks.active.unhealthy.active and
self.checks.active.unhealthy.interval or 0)
+ self.checks.active.timeout) * 2

local callback_lock = self.CALLBACK_LOCK .. health_mode
-- a pending timer already exists, so skip this time
local ok, _ = get_callback_lock(self.shm, callback_lock, callback_pending_ttl)
if not ok then
return
end

local list_to_check = {}
local targets, err = fetch_target_list(self)
if not targets then
self:log(ERR, "checker_callback: ", err)
remove_callback_lock(self.shm, callback_lock)
return
end

Expand All @@ -1236,6 +1280,7 @@ local function checker_callback(self, health_mode)

if not list_to_check[1] then
self:log(DEBUG, "checking ", health_mode, " targets: nothing to do")
remove_callback_lock(self.shm, callback_lock)
else
local timer = resty_timer({
interval = 0,
Expand All @@ -1245,10 +1290,12 @@ local function checker_callback(self, health_mode)
expire = function()
self:log(DEBUG, "checking ", health_mode, " targets: #", #list_to_check)
self:active_check_targets(list_to_check)
remove_callback_lock(self.shm, callback_lock)
end,
})
if timer == nil then
self:log(ERR, "failed to create timer to check ", health_mode)
remove_callback_lock(self.shm, callback_lock)
end
end
end
Expand Down Expand Up @@ -1618,6 +1665,7 @@ function _M.new(opts)
self.TARGET_LIST_LOCK = SHM_PREFIX .. self.name .. ":target_list_lock"
self.TARGET_LOCK = SHM_PREFIX .. self.name .. ":target_lock"
self.PERIODIC_LOCK = SHM_PREFIX .. ":period_lock:"
self.CALLBACK_LOCK = SHM_PREFIX .. self.name .. ":callback_lock:"
-- prepare constants
self.EVENT_SOURCE = EVENT_SOURCE_PREFIX .. " [" .. self.name .. "]"
self.LOG_PREFIX = LOG_PREFIX .. "(" .. self.name .. ") "
Expand Down
15 changes: 9 additions & 6 deletions t/with_resty-events/14-tls_active_probes.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ __DATA__
events_module = "resty.events",
checks = {
active = {
timeout = 2,
type = "https",
http_path = "/",
healthy = {
Expand All @@ -60,7 +61,7 @@ events_module = "resty.events",
}
})
local ok, err = checker:add_target("104.154.89.105", 443, "badssl.com", false)
ngx.sleep(8) -- wait for 4x the check interval
ngx.sleep(16) -- wait for 4x the check interval
ngx.say(checker:get_target_status("104.154.89.105", 443, "badssl.com")) -- true
}
}
Expand All @@ -69,7 +70,7 @@ GET /t
--- response_body
true
--- timeout
15
20

=== TEST 2: active probes, invalid cert
--- http_config eval: $::HttpConfig
Expand All @@ -87,6 +88,7 @@ true
events_module = "resty.events",
checks = {
active = {
timeout = 2,
type = "https",
http_path = "/",
healthy = {
Expand All @@ -101,7 +103,7 @@ events_module = "resty.events",
}
})
local ok, err = checker:add_target("104.154.89.105", 443, "wrong.host.badssl.com", true)
ngx.sleep(8) -- wait for 4x the check interval
ngx.sleep(16) -- wait for 4x the check interval
ngx.say(checker:get_target_status("104.154.89.105", 443, "wrong.host.badssl.com")) -- false
}
}
Expand All @@ -110,7 +112,7 @@ GET /t
--- response_body
false
--- timeout
15
20

=== TEST 3: active probes, accept invalid cert when disabling check
--- http_config eval: $::HttpConfig
Expand All @@ -128,6 +130,7 @@ false
events_module = "resty.events",
checks = {
active = {
timeout = 2,
type = "https",
https_verify_certificate = false,
http_path = "/",
Expand All @@ -143,7 +146,7 @@ events_module = "resty.events",
}
})
local ok, err = checker:add_target("104.154.89.105", 443, "wrong.host.badssl.com", false)
ngx.sleep(8) -- wait for 4x the check interval
ngx.sleep(16) -- wait for 4x the check interval
ngx.say(checker:get_target_status("104.154.89.105", 443, "wrong.host.badssl.com")) -- true
}
}
Expand All @@ -152,4 +155,4 @@ GET /t
--- response_body
true
--- timeout
15
20
93 changes: 93 additions & 0 deletions t/with_resty-events/19-timer.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use Test::Nginx::Socket::Lua;
use Cwd qw(cwd);

workers(1);

plan tests => repeat_each() * blocks() * 2;

my $pwd = cwd();
$ENV{TEST_NGINX_SERVROOT} = server_root();

our $HttpConfig = qq{
lua_package_path "$pwd/lib/?.lua;;";
lua_shared_dict test_shm 8m;

init_worker_by_lua_block {
local we = require "resty.events.compat"
assert(we.configure({
unique_timeout = 5,
broker_id = 0,
listening = "unix:$ENV{TEST_NGINX_SERVROOT}/worker_events.sock"
}))
assert(we.configured())
}

server {
server_name kong_worker_events;
listen unix:$ENV{TEST_NGINX_SERVROOT}/worker_events.sock;
access_log off;
location / {
content_by_lua_block {
require("resty.events.compat").run()
}
}
}
};

run_tests();

__DATA__




=== TEST 1: active probes, http node failing
--- http_config eval
qq{
$::HttpConfig

server {
listen 2130;
location = /status {
content_by_lua_block {
ngx.sleep(2)
ngx.exit(500);
}
}
}
}
--- config
location = /t {
content_by_lua_block {
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
events_module = "resty.events",
type = "http",
checks = {
active = {
timeout = 1,
http_path = "/status",
healthy = {
interval = 0.1,
successes = 3,
},
unhealthy = {
interval = 0.1,
http_failures = 3,
}
},
}
})
local ok, err = checker:add_target("127.0.0.1", 2130, nil, true)
ngx.sleep(3) -- wait for some time to let the checks run
-- There should be no more than 3 timers running atm, but
-- add a few spaces for worker events
ngx.say(tonumber(ngx.timer.running_count()) <= 5)
}
}
--- request
GET /t
--- response_body
true
15 changes: 9 additions & 6 deletions t/with_worker-events/14-tls_active_probes.t
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ __DATA__
shm_name = "test_shm",
checks = {
active = {
timeout = 2,
type = "https",
http_path = "/",
healthy = {
Expand All @@ -48,7 +49,7 @@ __DATA__
}
})
local ok, err = checker:add_target("104.154.89.105", 443, "badssl.com", false)
ngx.sleep(8) -- wait for 4x the check interval
ngx.sleep(16) -- wait for 4x the check interval
ngx.say(checker:get_target_status("104.154.89.105", 443, "badssl.com")) -- true
}
}
Expand All @@ -57,7 +58,7 @@ GET /t
--- response_body
true
--- timeout
15
20

=== TEST 2: active probes, invalid cert
--- http_config eval: $::HttpConfig
Expand All @@ -74,6 +75,7 @@ true
shm_name = "test_shm",
checks = {
active = {
timeout = 2,
type = "https",
http_path = "/",
healthy = {
Expand All @@ -88,7 +90,7 @@ true
}
})
local ok, err = checker:add_target("104.154.89.105", 443, "wrong.host.badssl.com", true)
ngx.sleep(8) -- wait for 4x the check interval
ngx.sleep(16) -- wait for 4x the check interval
ngx.say(checker:get_target_status("104.154.89.105", 443, "wrong.host.badssl.com")) -- false
}
}
Expand All @@ -97,7 +99,7 @@ GET /t
--- response_body
false
--- timeout
15
20

=== TEST 3: active probes, accept invalid cert when disabling check
--- http_config eval: $::HttpConfig
Expand All @@ -114,6 +116,7 @@ false
shm_name = "test_shm",
checks = {
active = {
timeout = 2,
type = "https",
https_verify_certificate = false,
http_path = "/",
Expand All @@ -129,7 +132,7 @@ false
}
})
local ok, err = checker:add_target("104.154.89.105", 443, "wrong.host.badssl.com", false)
ngx.sleep(8) -- wait for 4x the check interval
ngx.sleep(16) -- wait for 4x the check interval
ngx.say(checker:get_target_status("104.154.89.105", 443, "wrong.host.badssl.com")) -- true
}
}
Expand All @@ -138,4 +141,4 @@ GET /t
--- response_body
true
--- timeout
15
20
Loading

0 comments on commit 2dc6ddb

Please sign in to comment.