From 3e1e672e38f3f2665733b3e8577390d4c32a8530 Mon Sep 17 00:00:00 2001 From: Ashok Siyani Date: Thu, 21 Mar 2024 10:55:18 +0000 Subject: [PATCH] reduce state message size when terraform run fails --- api/v1beta1/module_types.go | 14 ++++++++++++++ controllers/module_controller.go | 12 ++++++------ runner/runner.go | 8 ++++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/api/v1beta1/module_types.go b/api/v1beta1/module_types.go index 15a14bf5..74f7b942 100644 --- a/api/v1beta1/module_types.go +++ b/api/v1beta1/module_types.go @@ -17,6 +17,8 @@ limitations under the License. package v1beta1 import ( + "strings" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -47,6 +49,8 @@ const ( ReasonPlanedDriftDetected = "PlanedDriftDetected" ReasonPlanedNoDriftDetected = "PlanedNoDriftDetected" ReasonApplied = "Applied" + + stateMsgCharLimit = 1024 ) const ( @@ -315,3 +319,13 @@ func GetRunReason(runType string) string { } return ReasonRunTriggered } + +func NormaliseStateMsg(msg string) string { + msg = strings.TrimSpace(msg) + + r := []rune(msg) + if len(r) > stateMsgCharLimit { + return strings.TrimSpace(string(r[:stateMsgCharLimit])) + "..." + } + return msg +} diff --git a/controllers/module_controller.go b/controllers/module_controller.go index 69d14b47..1413c69e 100644 --- a/controllers/module_controller.go +++ b/controllers/module_controller.go @@ -93,7 +93,7 @@ func (r *ModuleReconciler) Reconcile(ctx context.Context, req reconcile.Request) if module.Spec.RepoURL == "" { msg := fmt.Sprintf("repoURL is required, please add repoURL instead of repoName:%s", module.Spec.RepoName) log.Error(msg) - r.setFailedStatus(req, &module, tfaplv1beta1.ReasonSpecsParsingFailure, msg, r.Clock.Now()) + r.setFailedStatus(req, &module, tfaplv1beta1.ReasonSpecsParsingFailure, msg) // we don't really care about requeuing until we get an update that // fixes the repoURL, so don't return an error return ctrl.Result{}, nil @@ -115,7 +115,7 @@ func (r *ModuleReconciler) Reconcile(ctx context.Context, req reconcile.Request) // module is not currently running so change status and continue msg := "wrong status found, module is not currently running" log.Error(msg) - r.setFailedStatus(req, &module, tfaplv1beta1.ReasonUnknown, msg, r.Clock.Now()) + r.setFailedStatus(req, &module, tfaplv1beta1.ReasonUnknown, msg) } var isPlanOnly bool @@ -135,7 +135,7 @@ func (r *ModuleReconciler) Reconcile(ctx context.Context, req reconcile.Request) if err != nil { msg := fmt.Sprintf("unable to get current hash of the repo err:%s", err) log.Error(msg) - r.setFailedStatus(req, &module, tfaplv1beta1.ReasonGitFailure, msg, r.Clock.Now()) + r.setFailedStatus(req, &module, tfaplv1beta1.ReasonGitFailure, msg) // since issue is not related to module specs, requeue again in case its fixed return ctrl.Result{RequeueAfter: pollIntervalDuration}, nil } @@ -157,7 +157,7 @@ func (r *ModuleReconciler) Reconcile(ctx context.Context, req reconcile.Request) if err != nil { msg := fmt.Sprintf("unable to figure out CronJob schedule: err:%s", err) log.Error(msg) - r.setFailedStatus(req, &module, tfaplv1beta1.ReasonSpecsParsingFailure, msg, r.Clock.Now()) + r.setFailedStatus(req, &module, tfaplv1beta1.ReasonSpecsParsingFailure, msg) // we don't really care about requeuing until we get an update that // fixes the schedule, so don't return an error return ctrl.Result{}, nil @@ -251,10 +251,10 @@ func NextSchedule(module *tfaplv1beta1.Module, now time.Time, minIntervalBetween return numOfMissedRuns, sched.Next(now), nil } -func (r *ModuleReconciler) setFailedStatus(req ctrl.Request, module *tfaplv1beta1.Module, reason, msg string, now time.Time) { +func (r *ModuleReconciler) setFailedStatus(req ctrl.Request, module *tfaplv1beta1.Module, reason, msg string) { module.Status.CurrentState = string(tfaplv1beta1.StatusErrored) - module.Status.StateMessage = msg + module.Status.StateMessage = tfaplv1beta1.NormaliseStateMsg(msg) module.Status.StateReason = reason module.Status.RunStartedAt = nil module.Status.RunDuration = nil diff --git a/runner/runner.go b/runner/runner.go index 19385f2a..74fb9a63 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -368,7 +368,7 @@ func (r *Runner) runTF( func (r *Runner) SetProgressingStatus(objectKey types.NamespacedName, m *tfaplv1beta1.Module, msg string) error { m.Status.CurrentState = string(tfaplv1beta1.StatusRunning) - m.Status.StateMessage = msg + m.Status.StateMessage = tfaplv1beta1.NormaliseStateMsg(msg) return r.patchStatus(context.Background(), objectKey, m.Status) } @@ -382,7 +382,7 @@ func (r *Runner) SetRunStartedStatus(req Request, m *tfaplv1beta1.Module, msg, c m.Status.RunCommitHash = commitHash m.Status.RunCommitMsg = commitMsg m.Status.RemoteURL = remoteURL - m.Status.StateMessage = msg + m.Status.StateMessage = tfaplv1beta1.NormaliseStateMsg(msg) m.Status.StateReason = tfaplv1beta1.GetRunReason(req.Type) r.Recorder.Eventf(m, corev1.EventTypeNormal, tfaplv1beta1.GetRunReason(req.Type), "%s: type:%s, commit:%s", msg, req.Type, commitHash) @@ -393,7 +393,7 @@ func (r *Runner) SetRunStartedStatus(req Request, m *tfaplv1beta1.Module, msg, c func (r *Runner) SetRunFinishedStatus(objectKey types.NamespacedName, m *tfaplv1beta1.Module, reason, msg string, now time.Time) error { m.Status.CurrentState = string(tfaplv1beta1.StatusReady) m.Status.RunDuration = &metav1.Duration{Duration: now.Sub(m.Status.RunStartedAt.Time).Round(time.Second)} - m.Status.StateMessage = msg + m.Status.StateMessage = tfaplv1beta1.NormaliseStateMsg(msg) m.Status.StateReason = reason r.Recorder.Event(m, corev1.EventTypeNormal, reason, msg) @@ -405,7 +405,7 @@ func (r *Runner) setFailedStatus(req Request, module *tfaplv1beta1.Module, reaso module.Status.CurrentState = string(tfaplv1beta1.StatusErrored) module.Status.RunDuration = &metav1.Duration{Duration: now.Sub(module.Status.RunStartedAt.Time).Round(time.Second)} - module.Status.StateMessage = msg + module.Status.StateMessage = tfaplv1beta1.NormaliseStateMsg(msg) module.Status.StateReason = reason r.Recorder.Event(module, corev1.EventTypeWarning, reason, fmt.Sprintf("%q", msg))