Skip to content

Commit

Permalink
[FSM] fix metrics deletion when auto-create enabled (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
harveyxia authored Dec 11, 2024
1 parent 37ff145 commit 9daa7e4
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 40 deletions.
19 changes: 10 additions & 9 deletions pkg/fsm/internal/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,18 @@ func (r *fsmReconciler[T, Obj]) reconcile(

if r.reconcilerOptions.CreateIfNotFound {
obj := r.reconcilerOptions.CreateFunc(req)
// NOTE: nil obj signals that the object should not be created. This is primarily used by callers to prevent
// stale requests in the event queue from triggering unneeded object creations.
if obj == nil {
// Create the object supplied by the caller if not nil.
if obj != nil {
// already exists error can occur if the CreateFunc sets the object name to something other than req.Name
if err := r.client.Create(ctx, obj); client.IgnoreAlreadyExists(err) != nil {
return nil, nil, types.ErrorResult(fmt.Errorf("creating object %s: %w", req.NamespacedName, err))
}
// NOTE: wait for next reconcile before updating status to reduce "object does not exist, cannot update its status" errors
return nil, nil, types.DoneResult()
}
// already exists error can occur if the CreateFunc sets the object name to something other than req.Name
if err := r.client.Create(ctx, obj); client.IgnoreAlreadyExists(err) != nil {
return nil, nil, types.ErrorResult(fmt.Errorf("creating object %s: %w", req.NamespacedName, err))
}
// NOTE: wait for next reconcile before updating status to reduce "object does not exist, cannot update its status" errors
return nil, nil, types.DoneResult()

// If obj is nil, the caller signals that the object should not be created. This is primarily used by callers to prevent
// stale requests in the event queue from triggering unneeded object creations.
}

// deregister metrics for deleted objects (to keep metrics cardinality count from monotonically increasing over an application's lifetime)
Expand Down
5 changes: 4 additions & 1 deletion pkg/fsm/internal/test/core/controller_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package core
import (
"context"
"path/filepath"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -35,6 +36,8 @@ var (
c client.Client
log *zap.SugaredLogger
reg *prometheus.Registry

disableAutoCreate = new(atomic.Bool)
)

var _ = BeforeSuite(func() {
Expand Down Expand Up @@ -68,7 +71,7 @@ var _ = BeforeSuite(func() {
Client: mgr.GetClient(),
Applicator: io.NewAPIPatchingApplicator(mgr.GetClient()),
}
return SetupController(log, mgr, rl, clientApplicator, metrics)
return SetupController(log, mgr, rl, clientApplicator, metrics, disableAutoCreate)
},
).
Start()
Expand Down
138 changes: 109 additions & 29 deletions pkg/fsm/internal/test/core/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,13 @@ var _ = Describe("Controller", Ordered, func() {
Eventually(func(g Gomega) {
// get metric
metric, err := getMetric("achilles_object_suspended", suspendMetricLabelsMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

// validate value
value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(1)))
g.Expect(value).To(Equal(float64(1)))
}).Should(Succeed())

_, err = controllerutil.CreateOrPatch(ctx, c, claim, func() error {
Expand All @@ -337,13 +337,13 @@ var _ = Describe("Controller", Ordered, func() {
Eventually(func(g Gomega) {
// get metric
metric, err := getMetric("achilles_object_suspended", suspendMetricLabelsMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

// validate value
value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(0)))
g.Expect(value).To(Equal(float64(0)))
}).Should(Succeed())
})

Expand All @@ -352,45 +352,45 @@ var _ = Describe("Controller", Ordered, func() {
rgTrueCondLabelMap := statusConditionLabels(client.ObjectKeyFromObject(testClaim), api.TypeReady, string(metav1.ConditionTrue))
Eventually(func(g Gomega) {
metric, err := getMetric("achilles_resource_readiness", rgTrueCondLabelMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(1)))
g.Expect(value).To(Equal(float64(1)))
}).Should(Succeed())

rgFalseCondLabelMap := statusConditionLabels(client.ObjectKeyFromObject(testClaim), api.TypeReady, string(metav1.ConditionFalse))
Eventually(func(g Gomega) {
metric, err := getMetric("achilles_resource_readiness", rgFalseCondLabelMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(0)))
g.Expect(value).To(Equal(float64(0)))
}).Should(Succeed())

rgUnknownCondLabelMap := statusConditionLabels(client.ObjectKeyFromObject(testClaim), api.TypeReady, string(metav1.ConditionUnknown))
Eventually(func(g Gomega) {
metric, err := getMetric("achilles_resource_readiness", rgUnknownCondLabelMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(0)))
g.Expect(value).To(Equal(float64(0)))
}).Should(Succeed())

rgDeletedCondLabelMap := statusConditionLabels(client.ObjectKeyFromObject(testClaim), api.TypeReady, "Deleted")
Eventually(func(g Gomega) {
metric, err := getMetric("achilles_resource_readiness", rgDeletedCondLabelMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(0)))
g.Expect(value).To(Equal(float64(0)))
}).Should(Succeed())

rgUnsupportedCondLabelMap := statusConditionLabels(client.ObjectKeyFromObject(testClaim), api.TypeReady, "Unsupported")
Expand All @@ -409,12 +409,12 @@ var _ = Describe("Controller", Ordered, func() {
Eventually(func(g Gomega) {
// if state is specified, duration histogram value should not be zero, as there is one metric per state in the test reconciler
metric, err := getMetric("achilles_state_duration_seconds", sdCmProvLabelMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getHistogramMetricSampleCount(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).ToNot(Equal(uint64(0)))
g.Expect(value).ToNot(Equal(uint64(0)))
}).Should(Succeed())

sdInitialStateLabelMap := map[string]string{
Expand All @@ -425,25 +425,25 @@ var _ = Describe("Controller", Ordered, func() {
}
Eventually(func(g Gomega) {
metric, err := getMetric("achilles_state_duration_seconds", sdInitialStateLabelMap)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getHistogramMetricSampleCount(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).ToNot(Equal(uint64(0)))
g.Expect(value).ToNot(Equal(uint64(0)))
}).Should(Succeed())
})

It("should collect status condition metrics for custom types", func() {
initialStateMetricLabels := statusConditionLabels(client.ObjectKeyFromObject(testClaim), InitialStateConditionType, string(metav1.ConditionTrue))
Eventually(func(g Gomega) {
metric, err := getMetric("achilles_resource_readiness", initialStateMetricLabels)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

value, err := getGaugeMetricValue(metric)
Expect(err).ToNot(HaveOccurred())
g.Expect(err).ToNot(HaveOccurred())

Expect(value).To(Equal(float64(1)))
g.Expect(value).To(Equal(float64(1)))
}).Should(Succeed())
})

Expand Down Expand Up @@ -649,7 +649,7 @@ var _ = Describe("Controller", Ordered, func() {
Eventually(func(g Gomega) {
_, err := getMetric("achilles_resource_readiness", rgTrueCondLabelMap)
// expect error as the metric should have been deleted since TestClaim is the only object that has associated achilles_resource_readiness metric
Expect(err).To(MatchError("achilles_resource_readiness metric does not exist"))
g.Expect(err).To(MatchError("achilles_resource_readiness metric does not exist"))
}).Should(Succeed())
}

Expand All @@ -659,7 +659,87 @@ var _ = Describe("Controller", Ordered, func() {
"reqNamespace": testClaim.Namespace,
"controller": "test-claim",
})
Expect(err).To(MatchError("achilles_trigger metric does not exist"))
g.Expect(err).To(MatchError("achilles_trigger metric does not exist"))
}).Should(Succeed())
})

It("should handle automatic creation of objects when enabled", func() {
autoCreatedClaimKey := client.ObjectKey{Name: "test-create-func", Namespace: "default"}

By("triggering auto creation")

Expect(c.Create(ctx, &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "test-create-func",
Namespace: "default",
},
})).To(Succeed())

Eventually(func(g Gomega) {
// assert managed object creation
actual := &testv1alpha1.TestClaim{}
g.Expect(c.Get(ctx, autoCreatedClaimKey, actual)).To(Succeed())
}).Should(Succeed())

// assert that metrics exist for object
rgTrueCondLabelMap := statusConditionLabels(autoCreatedClaimKey, api.TypeReady, string(metav1.ConditionTrue))
Eventually(func(g Gomega) {
_, err := getMetric("achilles_resource_readiness", rgTrueCondLabelMap)
g.Expect(err).ToNot(HaveOccurred())
}).Should(Succeed())

// delete the auto created object
Expect(c.Delete(ctx, &testv1alpha1.TestClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "test-create-func",
Namespace: "default",
},
})).To(Succeed())

// assert that it's recreated
Eventually(func(g Gomega) {
// assert managed object creation
actual := &testv1alpha1.TestClaim{}
g.Expect(c.Get(ctx, autoCreatedClaimKey, actual)).To(Succeed())
}).Should(Succeed())

By("disabling auto creation")

disableAutoCreate.Store(true)

// delete the auto created object and assert that it's NOT recreated
Eventually(func(g Gomega) {
testClaim := &testv1alpha1.TestClaim{
ObjectMeta: metav1.ObjectMeta{
Name: "test-create-func",
Namespace: "default",
},
}

g.Expect(c.Delete(ctx, testClaim)).To(Succeed())

// manually remove finalizer
_, err := controllerutil.CreateOrPatch(ctx, c, testClaim, func() error {
testClaim.SetFinalizers([]string{})
return nil
})
Expect(err).ToNot(HaveOccurred())

actual := &testv1alpha1.TestClaim{}
g.Expect(errors.IsNotFound(c.Get(ctx, autoCreatedClaimKey, actual))).To(BeTrue())
}).Should(Succeed())

Consistently(func(g Gomega) {
// assert managed object creation
actual := &testv1alpha1.TestClaim{}
g.Expect(errors.IsNotFound(c.Get(ctx, autoCreatedClaimKey, actual))).To(BeTrue())
}).WithTimeout(3 * time.Second).Should(Succeed())

By("handling cleanup of metrics")
// assert that metrics DO NOT exist for object
Eventually(func(g Gomega) {
_, err := getMetric("achilles_resource_readiness", rgTrueCondLabelMap)
g.Expect(err.Error()).To(ContainSubstring("metric does not exist"))
}).Should(Succeed())
})
})
Expand Down
35 changes: 34 additions & 1 deletion pkg/fsm/internal/test/core/test_fsm_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"sync/atomic"
"time"

"go.uber.org/zap"
Expand All @@ -14,9 +15,12 @@ import (
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrlhandler "sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

achapi "github.com/reddit/achilles-sdk-api/api"
"github.com/reddit/achilles-sdk/pkg/fsm"
"github.com/reddit/achilles-sdk/pkg/fsm/handler"
"github.com/reddit/achilles-sdk/pkg/fsm/metrics"
fsmtypes "github.com/reddit/achilles-sdk/pkg/fsm/types"
testv1alpha1 "github.com/reddit/achilles-sdk/pkg/internal/tests/api/test/v1alpha1"
Expand All @@ -43,6 +47,7 @@ func SetupController(
rl workqueue.RateLimiter,
c *io.ClientApplicator,
metrics *metrics.Metrics,
disableAutoCreate *atomic.Bool,
) error {
r := &reconciler{
log: log,
Expand All @@ -67,8 +72,36 @@ func SetupController(
InitialStateConditionType,
},
},
// exercise automatic creation feature
CreateIfNotFound: true,
CreateFunc: func(req ctrl.Request) *testv1alpha1.TestClaim {
// only create the resource if it's named "test-create-func"
if req.Name != "test-create-func" {
return nil
}
// don't recreate if disabled (for exercising proper cleanup)
if disableAutoCreate.Load() {
return nil
}

return &testv1alpha1.TestClaim{
ObjectMeta: metav1.ObjectMeta{
Name: req.Name,
Namespace: req.Namespace,
},
}
},
},
).
Watches(&corev1.ConfigMap{},
// trigger auto creation of `test-create-func` TestClaim iff a ConfigMap of name `test-create-func` is created
ctrlhandler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) []reconcile.Request {
if o.GetName() == "test-create-func" {
return []reconcile.Request{{NamespacedName: client.ObjectKeyFromObject(o)}}
}
return nil
},
)
), handler.TriggerTypeRelative)

return builder.Build()(mgr, log, rl, metrics)
}
Expand Down

0 comments on commit 9daa7e4

Please sign in to comment.