Skip to content

Commit

Permalink
Remove PATCH node calls in eviction plugin
Browse files Browse the repository at this point in the history
These calls caused the PATCH node call at the end of the reconciliation loop
to fail, which may drop other state changes.
  • Loading branch information
Nuckal777 committed Oct 4, 2024
1 parent 9106c4f commit b45e973
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 20 deletions.
18 changes: 3 additions & 15 deletions controllers/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1181,11 +1181,7 @@ var _ = Describe("The eviction plugin", func() {
eviction := impl.Eviction{Action: impl.Cordon}
err := eviction.Trigger(plugin.Parameters{Ctx: ctx, Client: k8sClient, Node: node})
Expect(err).To(Succeed())
Eventually(func(g Gomega) bool {
err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(node), node)
g.Expect(err).To(Succeed())
return node.Spec.Unschedulable
}).Should(BeTrue())
Expect(node.Spec.Unschedulable).To(BeTrue())
})

It("should mark a node as schedulable with uncordon action", func(ctx SpecContext) {
Expand All @@ -1196,23 +1192,15 @@ var _ = Describe("The eviction plugin", func() {
eviction := impl.Eviction{Action: impl.Uncordon}
err := eviction.Trigger(plugin.Parameters{Ctx: ctx, Client: k8sClient, Node: node})
Expect(err).To(Succeed())
Eventually(func(g Gomega) bool {
err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(node), node)
g.Expect(err).To(Succeed())
return node.Spec.Unschedulable
}).Should(BeFalse())
Expect(node.Spec.Unschedulable).To(BeFalse())
})

It("should evict pods with the drain action", func(ctx SpecContext) {
eviction := impl.Eviction{Action: impl.Drain, DeletionTimeout: time.Second, EvictionTimeout: time.Minute}
params := plugin.Parameters{Ctx: ctx, Client: k8sClient, Clientset: k8sClientset, Node: node, Log: GinkgoLogr}
err := eviction.Trigger(params)
Expect(err).To(HaveOccurred()) // awaiting the pod deletions fails because there is no kubelet running
Eventually(func(g Gomega) bool {
err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(node), node)
g.Expect(err).To(Succeed())
return node.Spec.Unschedulable
}).Should(BeTrue())
Expect(node.Spec.Unschedulable).To(BeTrue())
Eventually(func(g Gomega) *metav1.Time {
err := k8sClient.Get(context.Background(), client.ObjectKeyFromObject(pod), pod)
g.Expect(err).To(Succeed())
Expand Down
1 change: 1 addition & 0 deletions docs/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ maintenance-required:
transitions:
- check: nodes_in_maintenance
next: in-maintenance
trigger: cordon_node
in-maintenance:
enter: drain_node
transitions:
Expand Down
1 change: 1 addition & 0 deletions docs/plugins.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ config:

### eviction
Cordons, uncordons or drains a node.
Ensure to run an instance with the `cordon` action before running an instance with the `drain` action.
```yaml
config:
action: one of "cordon", "uncordon" or "drain", required
Expand Down
18 changes: 13 additions & 5 deletions plugin/impl/eviction.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,21 @@ func (e *Eviction) ID() string {
func (e *Eviction) Trigger(params plugin.Parameters) error {
switch e.Action {
case Cordon:
return common.EnsureSchedulable(params.Ctx, params.Client, params.Node, false)
params.Node.Spec.Unschedulable = true
return nil
case Uncordon:
return common.EnsureSchedulable(params.Ctx, params.Client, params.Node, true)
params.Node.Spec.Unschedulable = false
return nil
case Drain:
if err := common.EnsureSchedulable(params.Ctx, params.Client, params.Node, false); err != nil {
return err
}
// The implementation below is technically wrong.
// Just assigning true to Unschedulable does not patch the node object on the api server.
// This done at the end of the reconciliation loop.
// Actively patching within this plugin increases the resource version to increase.
// The increment causes the patch at then end of the reconciliation loop to fail,
// which consistently drops state.
// Assigning true to unschedulable here is a sanity action.
// The user should configure to run the cordon action before running the drain action.
params.Node.Spec.Unschedulable = true
return common.EnsureDrain(params.Ctx, params.Node, params.Log, common.DrainParameters{
AwaitDeletion: common.WaitParameters{
Period: defaultPeriod,
Expand Down

0 comments on commit b45e973

Please sign in to comment.