From 76412c5c6b2463bf884ac0ef0514175b77f8be7a Mon Sep 17 00:00:00 2001 From: Barthelemy Date: Wed, 1 Nov 2023 15:59:35 +0100 Subject: [PATCH 1/4] [QC-1038] fix Race condition in Service Discovery --- .../include/QualityControl/ServiceDiscovery.h | 3 +++ Framework/src/ServiceDiscovery.cxx | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Framework/include/QualityControl/ServiceDiscovery.h b/Framework/include/QualityControl/ServiceDiscovery.h index 819769d3ba..6e20353235 100644 --- a/Framework/include/QualityControl/ServiceDiscovery.h +++ b/Framework/include/QualityControl/ServiceDiscovery.h @@ -65,6 +65,9 @@ class ServiceDiscovery const std::string mId; ///< Instance (service) ID std::string mHealthUrl; ///< hostname of health check endpoint size_t mHealthPort; ///< Port of the health check endpoint + std::mutex mHealthPortMutex; ///< Mutex for the port + std::condition_variable mHealthPortCV; ///< Condition varaible for the port + std::atomic mHealthPortAssigned; ///< Port of the health check is ready. std::thread mHealthThread; ///< Health check thread std::atomic mThreadRunning; ///< Health check thread running flag diff --git a/Framework/src/ServiceDiscovery.cxx b/Framework/src/ServiceDiscovery.cxx index b5139214ad..bd76ca4727 100644 --- a/Framework/src/ServiceDiscovery.cxx +++ b/Framework/src/ServiceDiscovery.cxx @@ -38,6 +38,7 @@ ServiceDiscovery::ServiceDiscovery(const std::string& url, const std::string& na : curlHandle(initCurl(), &ServiceDiscovery::deleteCurl), mConsulUrl(url), mName(name), mId(id), mHealthUrl(healthEndUrl) { mHealthUrl = mHealthUrl.empty() ? boost::asio::ip::host_name() : mHealthUrl; + mHealthPortAssigned = false; mHealthThread = std::thread([=] { #ifdef __linux__ @@ -96,6 +97,11 @@ bool ServiceDiscovery::_register(const std::string& objects) check.put("Name", "Health check " + mId); check.put("Interval", "5s"); check.put("DeregisterCriticalServiceAfter", "1m"); + // Wait until port is set by the thread + { + std::unique_lock lk(mHealthPortMutex); + mHealthPortCV.wait(lk, [this]{return mHealthPortAssigned == true;}); + } check.put("TCP", mHealthUrl + ":" + std::to_string(mHealthPort)); checks.push_back(std::make_pair("", check)); @@ -151,12 +157,18 @@ void ServiceDiscovery::runHealthServer() ILOG(Debug, Trace) << "ServiceDiscovery::runHealthServer - cound not bind to " << port << ENDM; continue; // try the next one } - // if we reach this point it means that we could bind - mHealthPort = port; - ILOG(Debug, Devel) << "ServiceDiscovery selected port: " << mHealthPort << ENDM; break; } + // assign the port and unblock the main thread (we got a port or we failed) + { + ILOG(Debug, Devel) << "ServiceDiscovery selected port: " << mHealthPort << ENDM; + std::lock_guard lk(mHealthPortMutex); + mHealthPort = port; + mHealthPortAssigned = true; + } + mHealthPortCV.notify_one(); + if (cycle == rangeLength) { ILOG(Error, Support) << "Could not find a free port for the ServiceDiscovery, aborting the ServiceDiscovery health check" << ENDM; return; From 7d8478231b50608810be21cccc1687ce9139e8f2 Mon Sep 17 00:00:00 2001 From: Barthelemy Date: Wed, 1 Nov 2023 16:01:14 +0100 Subject: [PATCH 2/4] format --- Framework/src/ServiceDiscovery.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/src/ServiceDiscovery.cxx b/Framework/src/ServiceDiscovery.cxx index bd76ca4727..28425a0f5f 100644 --- a/Framework/src/ServiceDiscovery.cxx +++ b/Framework/src/ServiceDiscovery.cxx @@ -100,7 +100,7 @@ bool ServiceDiscovery::_register(const std::string& objects) // Wait until port is set by the thread { std::unique_lock lk(mHealthPortMutex); - mHealthPortCV.wait(lk, [this]{return mHealthPortAssigned == true;}); + mHealthPortCV.wait(lk, [this] { return mHealthPortAssigned == true; }); } check.put("TCP", mHealthUrl + ":" + std::to_string(mHealthPort)); checks.push_back(std::make_pair("", check)); From a3b31a66940aec7c52be8e54714efbd9663d19f8 Mon Sep 17 00:00:00 2001 From: Barthelemy Date: Wed, 1 Nov 2023 16:34:42 +0100 Subject: [PATCH 3/4] add missing include --- Framework/include/QualityControl/ServiceDiscovery.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Framework/include/QualityControl/ServiceDiscovery.h b/Framework/include/QualityControl/ServiceDiscovery.h index 6e20353235..2511c1791d 100644 --- a/Framework/include/QualityControl/ServiceDiscovery.h +++ b/Framework/include/QualityControl/ServiceDiscovery.h @@ -20,6 +20,7 @@ #include #include #include +#include typedef void CURL; From b9dc161432a514ae06054747a8859689e4561dee Mon Sep 17 00:00:00 2001 From: Barthelemy Date: Thu, 2 Nov 2023 08:16:50 +0100 Subject: [PATCH 4/4] add missing include --- Framework/include/QualityControl/ServiceDiscovery.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Framework/include/QualityControl/ServiceDiscovery.h b/Framework/include/QualityControl/ServiceDiscovery.h index 2511c1791d..a4dce5c884 100644 --- a/Framework/include/QualityControl/ServiceDiscovery.h +++ b/Framework/include/QualityControl/ServiceDiscovery.h @@ -21,6 +21,7 @@ #include #include #include +#include typedef void CURL;