From b45e97358c329fbc415a1fc53e139f26fca798ce Mon Sep 17 00:00:00 2001 From: Nuckal777 Date: Fri, 4 Oct 2024 10:40:56 +0200 Subject: [PATCH] Remove PATCH node calls in eviction plugin These calls caused the PATCH node call at the end of the reconciliation loop to fail, which may drop other state changes. --- controllers/node_controller_test.go | 18 +++--------------- docs/concepts.md | 1 + docs/plugins.md | 1 + plugin/impl/eviction.go | 18 +++++++++++++----- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/controllers/node_controller_test.go b/controllers/node_controller_test.go index e846e39..bc343f3 100644 --- a/controllers/node_controller_test.go +++ b/controllers/node_controller_test.go @@ -1181,11 +1181,7 @@ var _ = Describe("The eviction plugin", func() { eviction := impl.Eviction{Action: impl.Cordon} err := eviction.Trigger(plugin.Parameters{Ctx: ctx, Client: k8sClient, Node: node}) Expect(err).To(Succeed()) - Eventually(func(g Gomega) bool { - err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(node), node) - g.Expect(err).To(Succeed()) - return node.Spec.Unschedulable - }).Should(BeTrue()) + Expect(node.Spec.Unschedulable).To(BeTrue()) }) It("should mark a node as schedulable with uncordon action", func(ctx SpecContext) { @@ -1196,11 +1192,7 @@ var _ = Describe("The eviction plugin", func() { eviction := impl.Eviction{Action: impl.Uncordon} err := eviction.Trigger(plugin.Parameters{Ctx: ctx, Client: k8sClient, Node: node}) Expect(err).To(Succeed()) - Eventually(func(g Gomega) bool { - err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(node), node) - g.Expect(err).To(Succeed()) - return node.Spec.Unschedulable - }).Should(BeFalse()) + Expect(node.Spec.Unschedulable).To(BeFalse()) }) It("should evict pods with the drain action", func(ctx SpecContext) { @@ -1208,11 +1200,7 @@ var _ = Describe("The eviction plugin", func() { params := plugin.Parameters{Ctx: ctx, Client: k8sClient, Clientset: k8sClientset, Node: node, Log: GinkgoLogr} err := eviction.Trigger(params) Expect(err).To(HaveOccurred()) // awaiting the pod deletions fails because there is no kubelet running - Eventually(func(g Gomega) bool { - err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(node), node) - g.Expect(err).To(Succeed()) - return node.Spec.Unschedulable - }).Should(BeTrue()) + Expect(node.Spec.Unschedulable).To(BeTrue()) Eventually(func(g Gomega) *metav1.Time { err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(pod), pod) g.Expect(err).To(Succeed()) diff --git a/docs/concepts.md b/docs/concepts.md index 49b0673..167ac8f 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -95,6 +95,7 @@ maintenance-required: transitions: - check: nodes_in_maintenance next: in-maintenance + trigger: cordon_node in-maintenance: enter: drain_node transitions: diff --git a/docs/plugins.md b/docs/plugins.md index 6fc5b2d..25871b7 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -170,6 +170,7 @@ config: ### eviction Cordons, uncordons or drains a node. +Ensure to run an instance with the `cordon` action before running an instance with the `drain` action. ```yaml config: action: one of "cordon", "uncordon" or "drain", required diff --git a/plugin/impl/eviction.go b/plugin/impl/eviction.go index 029530c..f68a1c2 100644 --- a/plugin/impl/eviction.go +++ b/plugin/impl/eviction.go @@ -80,13 +80,21 @@ func (e *Eviction) ID() string { func (e *Eviction) Trigger(params plugin.Parameters) error { switch e.Action { case Cordon: - return common.EnsureSchedulable(params.Ctx, params.Client, params.Node, false) + params.Node.Spec.Unschedulable = true + return nil case Uncordon: - return common.EnsureSchedulable(params.Ctx, params.Client, params.Node, true) + params.Node.Spec.Unschedulable = false + return nil case Drain: - if err := common.EnsureSchedulable(params.Ctx, params.Client, params.Node, false); err != nil { - return err - } + // The implementation below is technically wrong. + // Just assigning true to Unschedulable does not patch the node object on the api server. + // This done at the end of the reconciliation loop. + // Actively patching within this plugin increases the resource version to increase. + // The increment causes the patch at then end of the reconciliation loop to fail, + // which consistently drops state. + // Assigning true to unschedulable here is a sanity action. + // The user should configure to run the cordon action before running the drain action. + params.Node.Spec.Unschedulable = true return common.EnsureDrain(params.Ctx, params.Node, params.Log, common.DrainParameters{ AwaitDeletion: common.WaitParameters{ Period: defaultPeriod,