From 1b6b9040f75fc74ffb86b0c555934aa4f9d71366 Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Wed, 18 Oct 2023 15:25:38 +0100 Subject: [PATCH] More strict alerts/template checks --- docs/changelog.md | 12 ++++++++++ internal/checks/alerts_comparison.go | 7 ++++++ internal/checks/alerts_template.go | 27 ++++++++++++++++++++++ internal/checks/alerts_template_test.go | 30 +++++++++++++++++++++++++ internal/checks/base_test.go | 2 +- 5 files changed, 77 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 987c6eb5..dc7a808b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -22,6 +22,18 @@ - `alerts/comparison` check can now warn if alerting rules use a query with `foo > 0 OR vector(1)`, which would always fire. +- `alerts/template` check will now look check `on(...)` clause on binary + expressions. When `on(...)` is set only labels listed there will appear + on result metrics. + For example `app_type` here cannot appear on query results, even if it's + present on `foo` time series. + + ```yaml + - alert: ... + expr: foo / on (instance, app_name) bar + annotations: + summary: ... {{ $labels.app_type }} ... + ``` ## v0.46.0 diff --git a/internal/checks/alerts_comparison.go b/internal/checks/alerts_comparison.go index bec9ea51..d16a98dd 100644 --- a/internal/checks/alerts_comparison.go +++ b/internal/checks/alerts_comparison.go @@ -106,6 +106,13 @@ func isAbsent(node promParser.Node) bool { if node, ok := node.(*promParser.Call); ok && (node.Func.Name == "absent") { return true } + + for _, child := range promParser.Children(node) { + if isAbsent(child) { + return true + } + } + return false } diff --git a/internal/checks/alerts_template.go b/internal/checks/alerts_template.go index 21b22b36..de5d8cc0 100644 --- a/internal/checks/alerts_template.go +++ b/internal/checks/alerts_template.go @@ -28,6 +28,7 @@ const ( msgAggregation = "template is using %q label but the query removes it" msgAbsent = "template is using %q label but absent() is not passing it" + msgOn = "template is using %q label but the query uses on(...) without it being set there, this label will be missing from the query result" ) var ( @@ -101,11 +102,13 @@ func (c TemplateCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ aggrs := utils.HasOuterAggregation(rule.AlertingRule.Expr.Query) absentCalls := utils.HasOuterAbsent(rule.AlertingRule.Expr.Query) + binExpr := utils.HasOuterBinaryExpr(rule.AlertingRule.Expr.Query) vectors := utils.HasVectorSelector(rule.AlertingRule.Expr.Query) var safeLabels []string for _, be := range binaryExprs(rule.AlertingRule.Expr.Query) { if be.VectorMatching != nil { + safeLabels = append(safeLabels, be.VectorMatching.MatchingLabels...) safeLabels = append(safeLabels, be.VectorMatching.Include...) } } @@ -181,6 +184,18 @@ func (c TemplateCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ } } + if binExpr != nil && binExpr.VectorMatching != nil && binExpr.VectorMatching.Card == promParser.CardOneToOne && binExpr.VectorMatching.On && len(binExpr.VectorMatching.MatchingLabels) > 0 { + for _, msg := range checkMetricLabels(msgOn, label.Key.Value, label.Value.Value, binExpr.VectorMatching.MatchingLabels, false, safeLabels) { + problems = append(problems, Problem{ + Fragment: fmt.Sprintf("%s: %s", label.Key.Value, label.Value.Value), + Lines: mergeLines(label.Lines(), rule.AlertingRule.Expr.Lines()), + Reporter: c.Reporter(), + Text: msg, + Severity: Bug, + }) + } + } + labelNames := getTemplateLabels(label.Key.Value, label.Value.Value) if len(labelNames) > 0 && len(vectors) == 0 { for _, name := range labelNames { @@ -255,6 +270,18 @@ func (c TemplateCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ } } + if binExpr != nil && binExpr.VectorMatching != nil && binExpr.VectorMatching.Card == promParser.CardOneToOne && binExpr.VectorMatching.On && len(binExpr.VectorMatching.MatchingLabels) > 0 { + for _, msg := range checkMetricLabels(msgOn, annotation.Key.Value, annotation.Value.Value, binExpr.VectorMatching.MatchingLabels, false, safeLabels) { + problems = append(problems, Problem{ + Fragment: fmt.Sprintf("%s: %s", annotation.Key.Value, annotation.Value.Value), + Lines: mergeLines(annotation.Lines(), rule.AlertingRule.Expr.Lines()), + Reporter: c.Reporter(), + Text: msg, + Severity: Bug, + }) + } + } + if hasValue(annotation.Key.Value, annotation.Value.Value) && !hasHumanize(annotation.Key.Value, annotation.Value.Value) { for _, problem := range c.checkHumanizeIsNeeded(rule.AlertingRule.Expr.Query) { problems = append(problems, Problem{ diff --git a/internal/checks/alerts_template_test.go b/internal/checks/alerts_template_test.go index 6611f67a..e9ca46c1 100644 --- a/internal/checks/alerts_template_test.go +++ b/internal/checks/alerts_template_test.go @@ -1075,6 +1075,36 @@ func TestTemplateCheck(t *testing.T) { } }, }, + { + description: "foo / on(...) bar", + content: `- alert: Foo + expr: container_file_descriptors / on (instance, app_name) container_ulimits_soft{ulimit="max_open_files"} + annotations: + summary: "{{ $labels.app_type }} is using {{ $value }} fds." + labels: + job: "{{ $labels.job_name }}" +`, + checker: newTemplateCheck, + prometheus: noProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: `job: {{ $labels.job_name }}`, + Lines: []int{2, 6}, + Reporter: checks.TemplateCheckName, + Text: `template is using "job_name" label but the query uses on(...) without it being set there, this label will be missing from the query result`, + Severity: checks.Bug, + }, + { + Fragment: `summary: {{ $labels.app_type }} is using {{ $value }} fds.`, + Lines: []int{2, 4}, + Reporter: checks.TemplateCheckName, + Text: `template is using "app_type" label but the query uses on(...) without it being set there, this label will be missing from the query result`, + Severity: checks.Bug, + }, + } + }, + }, } runTests(t, testCases) } diff --git a/internal/checks/base_test.go b/internal/checks/base_test.go index 46e1c23e..c1f40955 100644 --- a/internal/checks/base_test.go +++ b/internal/checks/base_test.go @@ -104,7 +104,7 @@ type checkTest struct { } func runTests(t *testing.T, testCases []checkTest) { - zerolog.SetGlobalLevel(zerolog.DebugLevel) + zerolog.SetGlobalLevel(zerolog.FatalLevel) for _, tc := range testCases { // original test t.Run(tc.description, func(t *testing.T) {