From eca6917c6ab197b43ef6df47dfeda63d8a628f81 Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Fri, 3 Jul 2020 09:05:46 +0900 Subject: [PATCH 1/2] feat: Organizational RunnerDeployment Autoscaling Enhances #57 to add support for organizational runners. As GitHub Actions does not have an appropriate API for this, this is the spec you need: ``` apiVersion: actions.summerwind.dev/v1alpha1 kind: RunnerDeployment metadata: name: myrunners spec: minReplicas: 1 maxReplicas: 3 autoscaling: metrics: - type: TotalNumberOfQueuedAndProgressingWorkflowRuns repositories: # Assumes that you have `github.com/myorg/myrepo1` repo - myrepo1 - myrepo2 template: spec: organization: myorg ``` It works by collecting "in_progress" and "queued" workflow runs for the repositories `myrepo1` and `myrepo2` to autoscale the number of replicas, assuming you have this organizational runner deployment only for those two repositories. For example, if `myrepo1` had 1 `in_progress` and 2 `queued` workflow runs, and `myrepo2` had 4 `in_progress` and 8 `queued` workflow runs at the time of running the reconcilation loop on the runner deployment, it will scale replicas to 1 + 2 + 4 + 8 = 15. Perhaps we might be better add a kind of "ratio" setting so that you can configure the controller to create e.g. 2x runners than demanded. But that's another story. Ref #10 --- api/v1alpha1/runnerdeployment_types.go | 23 +++ api/v1alpha1/zz_generated.deepcopy.go | 43 +++++ ...ions.summerwind.dev_runnerdeployments.yaml | 21 +++ controllers/autoscaling.go | 82 ++++---- controllers/autoscaling_test.go | 178 +++++++++++++++++- controllers/runnerdeployment_controller.go | 4 +- 6 files changed, 311 insertions(+), 40 deletions(-) diff --git a/api/v1alpha1/runnerdeployment_types.go b/api/v1alpha1/runnerdeployment_types.go index 6f9f83da17..39556ed4d2 100644 --- a/api/v1alpha1/runnerdeployment_types.go +++ b/api/v1alpha1/runnerdeployment_types.go @@ -20,6 +20,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns = "TotalNumberOfQueuedAndProgressingWorkflowRuns" +) + // RunnerReplicaSetSpec defines the desired state of RunnerDeployment type RunnerDeploymentSpec struct { // +optional @@ -38,9 +42,28 @@ type RunnerDeploymentSpec struct { // +optional ScaleDownDelaySecondsAfterScaleUp *int `json:"scaleDownDelaySecondsAfterScaleOut,omitempty"` + // Autoscaling is set various configuration options for autoscaling this runner deployment. + // +optional + Autoscaling AutoscalingSpec `json:"autoscaling,omitempty"` + Template RunnerTemplate `json:"template"` } +type AutoscalingSpec struct { + Metrics []MetricSpec `json:"metrics,omitempty"` +} + +type MetricSpec struct { + // Type is the type of metric to be used for autoscaling. + // The only supported Type is TotalNumberOfQueuedAndProgressingWorkflowRuns + Type string `json:"type,omitempty"` + + // RepositoryNames is the list of repository names to be used for calculating the metric. + // For example, a repository name is the REPO part of `github.com/USER/REPO`. + // +optional + RepositoryNames []string `json:"repositoryNames,omitempty"` +} + type RunnerDeploymentStatus struct { AvailableReplicas int `json:"availableReplicas"` ReadyReplicas int `json:"readyReplicas"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 5fe3fb59b6..cf10a6e6d8 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -25,6 +25,48 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AutoscalingSpec) DeepCopyInto(out *AutoscalingSpec) { + *out = *in + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = make([]MetricSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalingSpec. +func (in *AutoscalingSpec) DeepCopy() *AutoscalingSpec { + if in == nil { + return nil + } + out := new(AutoscalingSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricSpec) DeepCopyInto(out *MetricSpec) { + *out = *in + if in.RepositoryNames != nil { + in, out := &in.RepositoryNames, &out.RepositoryNames + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricSpec. +func (in *MetricSpec) DeepCopy() *MetricSpec { + if in == nil { + return nil + } + out := new(MetricSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Runner) DeepCopyInto(out *Runner) { *out = *in @@ -134,6 +176,7 @@ func (in *RunnerDeploymentSpec) DeepCopyInto(out *RunnerDeploymentSpec) { *out = new(int) **out = **in } + in.Autoscaling.DeepCopyInto(&out.Autoscaling) in.Template.DeepCopyInto(&out.Template) } diff --git a/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml b/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml index 329cc35ebd..e396520150 100644 --- a/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml +++ b/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml @@ -46,6 +46,27 @@ spec: spec: description: RunnerReplicaSetSpec defines the desired state of RunnerDeployment properties: + autoscaling: + description: Autoscaling is set various configuration options for autoscaling + this runner deployment. + properties: + metrics: + items: + properties: + repositoryNames: + description: RepositoryNames is the list of repository names + to be used for calculating the metric. For example, a repository + name is the REPO part of `github.com/USER/REPO`. + items: + type: string + type: array + type: + description: Type is the type of metric to be used for autoscaling. + The only supported Type is TotalNumberOfQueuedAndProgressingWorkflowRuns + type: string + type: object + type: array + type: object maxReplicas: description: MinReplicas is the maximum number of replicas the deployment is allowed to scale diff --git a/controllers/autoscaling.go b/controllers/autoscaling.go index e4b3bf2c47..a1ce5d26df 100644 --- a/controllers/autoscaling.go +++ b/controllers/autoscaling.go @@ -2,20 +2,12 @@ package controllers import ( "context" + "errors" "fmt" "github.com/summerwind/actions-runner-controller/api/v1alpha1" "strings" ) -type NotSupported struct { -} - -var _ error = NotSupported{} - -func (e NotSupported) Error() string { - return "Autoscaling is currently supported only when spec.repository is set" -} - func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment) (*int, error) { if rd.Spec.Replicas != nil { return nil, fmt.Errorf("bug: determineDesiredReplicas should not be called for deplomeny with specific replicas") @@ -25,38 +17,60 @@ func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.Runner return nil, fmt.Errorf("runnerdeployment %s/%s is missing maxReplicas", rd.Namespace, rd.Name) } - var replicas int + var repos [][]string repoID := rd.Spec.Template.Spec.Repository if repoID == "" { - return nil, NotSupported{} - } + orgName := rd.Spec.Template.Spec.Organization + if orgName == "" { + return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") + } + + metrics := rd.Spec.Autoscaling.Metrics - repo := strings.Split(repoID, "/") - user, repoName := repo[0], repo[1] - list, _, err := r.GitHubClient.Actions.ListRepositoryWorkflowRuns(context.TODO(), user, repoName, nil) - if err != nil { - return nil, err + if len(metrics) == 0 { + return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required") + } else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns { + return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns) + } else if len(metrics[0].RepositoryNames) == 0 { + return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment") + } + + for _, repoName := range metrics[0].RepositoryNames { + repos = append(repos, []string{orgName, repoName}) + } + } else { + repo := strings.Split(repoID, "/") + + repos = append(repos, repo) } var total, inProgress, queued, completed, unknown int - for _, r := range list.WorkflowRuns { - total++ - - // In May 2020, there are only 3 statuses. - // Follow the below links for more details: - // - https://developer.github.com/v3/actions/workflow-runs/#list-repository-workflow-runs - // - https://developer.github.com/v3/checks/runs/#create-a-check-run - switch r.GetStatus() { - case "completed": - completed++ - case "in_progress": - inProgress++ - case "queued": - queued++ - default: - unknown++ + for _, repo := range repos { + user, repoName := repo[0], repo[1] + list, _, err := r.GitHubClient.Actions.ListRepositoryWorkflowRuns(context.TODO(), user, repoName, nil) + if err != nil { + return nil, err + } + + for _, r := range list.WorkflowRuns { + total++ + + // In May 2020, there are only 3 statuses. + // Follow the below links for more details: + // - https://developer.github.com/v3/actions/workflow-runs/#list-repository-workflow-runs + // - https://developer.github.com/v3/checks/runs/#create-a-check-run + switch r.GetStatus() { + case "completed": + completed++ + case "in_progress": + inProgress++ + case "queued": + queued++ + default: + unknown++ + } } } @@ -75,7 +89,7 @@ func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.Runner } rd.Status.Replicas = &desiredReplicas - replicas = desiredReplicas + replicas := desiredReplicas r.Log.V(1).Info( "Calculated desired replicas", diff --git a/controllers/autoscaling_test.go b/controllers/autoscaling_test.go index 9d3e6a82c8..8889e3ebfa 100644 --- a/controllers/autoscaling_test.go +++ b/controllers/autoscaling_test.go @@ -119,19 +119,183 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { fixed: intPtr(3), want: 3, }, + } + + for i := range testcases { + tc := testcases[i] + + log := zap.New(func(o *zap.Options) { + o.Development = true + }) + + scheme := runtime.NewScheme() + _ = clientgoscheme.AddToScheme(scheme) + _ = v1alpha1.AddToScheme(scheme) + + t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { + server := fake.NewServer(fake.WithListRepositoryWorkflowRunsResponse(200, tc.workflowRuns)) + defer server.Close() + client := newGithubClient(server) + + r := &RunnerDeploymentReconciler{ + Log: log, + GitHubClient: client, + Scheme: scheme, + } + + rd := v1alpha1.RunnerDeployment{ + TypeMeta: metav1.TypeMeta{}, + Spec: v1alpha1.RunnerDeploymentSpec{ + Template: v1alpha1.RunnerTemplate{ + Spec: v1alpha1.RunnerSpec{ + Repository: tc.repo, + }, + }, + Replicas: tc.fixed, + MaxReplicas: tc.max, + MinReplicas: tc.min, + }, + Status: v1alpha1.RunnerDeploymentStatus{ + Replicas: tc.sReplicas, + LastSuccessfulScaleOutTime: tc.sTime, + }, + } + + rs, err := r.newRunnerReplicaSetWithAutoscaling(rd) + if err != nil { + if tc.err == "" { + t.Fatalf("unexpected error: expected none, got %v", err) + } else if err.Error() != tc.err { + t.Fatalf("unexpected error: expected %v, got %v", tc.err, err) + } + return + } + + got := rs.Spec.Replicas + + if got == nil { + t.Fatalf("unexpected value of rs.Spec.Replicas: nil") + } + + if *got != tc.want { + t.Errorf("%d: incorrect desired replicas: want %d, got %d", i, tc.want, *got) + } + }) + } +} + +func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { + intPtr := func(v int) *int { + return &v + } + + metav1Now := metav1.Now() + testcases := []struct { + repos []string + org string + fixed *int + max *int + min *int + sReplicas *int + sTime *metav1.Time + workflowRuns string + want int + err string + }{ + // 3 demanded, max at 3 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(2), + max: intPtr(3), + workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 3, + }, + // 2 demanded, max at 3, currently 3, delay scaling down due to grace period + { + org: "test", + repos: []string{"valid"}, + min: intPtr(2), + max: intPtr(3), + sReplicas: intPtr(3), + sTime: &metav1Now, + workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 3, + }, + // 3 demanded, max at 2 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(2), + max: intPtr(2), + workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 2, + }, + // 2 demanded, min at 2 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(2), + max: intPtr(3), + workflowRuns: `{"total_count": 3, "workflow_runs":[{"status":"queued"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 2, + }, + // 1 demanded, min at 2 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(2), + max: intPtr(3), + workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"queued"}, {"status":"completed"}]}"`, + want: 2, + }, + // 1 demanded, min at 2 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(2), + max: intPtr(3), + workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`, + want: 2, + }, + // 1 demanded, min at 1 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(1), + max: intPtr(3), + workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"queued"}, {"status":"completed"}]}"`, + want: 1, + }, + // 1 demanded, min at 1 + { + org: "test", + repos: []string{"valid"}, + min: intPtr(1), + max: intPtr(3), + workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`, + want: 1, + }, + // fixed at 3 + { + org: "test", + repos: []string{"valid"}, + fixed: intPtr(3), + want: 3, + }, // org runner, fixed at 3 { org: "test", fixed: intPtr(3), want: 3, }, - // org runner, 1 demanded, min at 1 + // org runner, 1 demanded, min at 1, no repos { org: "test", min: intPtr(1), max: intPtr(3), workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"completed"}]}"`, - err: "Autoscaling is currently supported only when spec.repository is set", + err: "validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment", }, } @@ -162,7 +326,15 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { Spec: v1alpha1.RunnerDeploymentSpec{ Template: v1alpha1.RunnerTemplate{ Spec: v1alpha1.RunnerSpec{ - Repository: tc.repo, + Organization: tc.org, + }, + }, + Autoscaling: v1alpha1.AutoscalingSpec{ + Metrics: []v1alpha1.MetricSpec{ + { + Type: v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns, + RepositoryNames: tc.repos, + }, }, }, Replicas: tc.fixed, diff --git a/controllers/runnerdeployment_controller.go b/controllers/runnerdeployment_controller.go index 56fcc25fc1..c1a7c0d177 100644 --- a/controllers/runnerdeployment_controller.go +++ b/controllers/runnerdeployment_controller.go @@ -99,9 +99,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e desiredRS, err := r.newRunnerReplicaSetWithAutoscaling(rd) if err != nil { - if _, ok := err.(NotSupported); ok { - r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetAutoScaleNotSupported", err.Error()) - } + r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error()) log.Error(err, "Could not create runnerreplicaset") From ae30648985f576981a205674d4bf5adecd46b68e Mon Sep 17 00:00:00 2001 From: Yusuke Kuoka Date: Sun, 19 Jul 2020 18:42:06 +0900 Subject: [PATCH 2/2] feat: Use HorizontalRunnerAutoscaler for autoscaling --- README.md | 38 +++- .../horizontalrunnerautoscaler_types.go | 102 ++++++++++ api/v1alpha1/runnerdeployment_types.go | 37 +--- api/v1alpha1/zz_generated.deepcopy.go | 142 +++++++++++--- ...rwind.dev_horizontalrunnerautoscalers.yaml | 118 +++++++++++ ...ions.summerwind.dev_runnerdeployments.yaml | 37 ---- config/rbac/role.yaml | 20 ++ controllers/autoscaling.go | 22 +-- controllers/autoscaling_test.go | 96 +++++---- .../horizontalrunnerautoscaler_controller.go | 184 ++++++++++++++++++ controllers/runnerdeployment_controller.go | 55 +----- main.go | 17 +- 12 files changed, 665 insertions(+), 203 deletions(-) create mode 100644 api/v1alpha1/horizontalrunnerautoscaler_types.go create mode 100644 config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml create mode 100644 controllers/horizontalrunnerautoscaler_controller.go diff --git a/README.md b/README.md index 60d211ddc1..1eebae4256 100644 --- a/README.md +++ b/README.md @@ -199,13 +199,25 @@ In the below example, `actions-runner` checks for pending workflow runs for each apiVersion: actions.summerwind.dev/v1alpha1 kind: RunnerDeployment metadata: - name: summerwind-actions-runner-controller + name: example-runner-deployment spec: - minReplicas: 1 - maxReplicas: 3 template: spec: repository: summerwind/actions-runner-controller +--- +apiVersion: actions.summerwind.dev/v1alpha1 +kind: HorizontalRunnerAutoscaler +metadata: + name: example-runner-deployment-autoscaler +spec: + scaleTargetRef: + name: example-runner-deployment + minReplicas: 1 + maxReplicas: 3 + metrics: + - type: TotalNumberOfQueuedAndInProgressWorkflowRuns + repositoryNames: + - summerwind/actions-runner-controller ``` Please also note that the sync period is set to 10 minutes by default and it's configurable via `--sync-period` flag. @@ -217,14 +229,26 @@ By default, it doesn't scale down until the grace period of 10 minutes passes af apiVersion: actions.summerwind.dev/v1alpha1 kind: RunnerDeployment metadata: - name: summerwind-actions-runner-controller + name: example-runner-deployment spec: - minReplicas: 1 - maxReplicas: 3 - scaleDownDelaySecondsAfterScaleUp: 1m template: spec: repository: summerwind/actions-runner-controller +--- +apiVersion: actions.summerwind.dev/v1alpha1 +kind: HorizontalRunnerAutoscaler +metadata: + name: example-runner-deployment-autoscaler +spec: + scaleTargetRef: + name: example-runner-deployment + minReplicas: 1 + maxReplicas: 3 + scaleDownDelaySecondsAfterScaleOut: 60 + metrics: + - type: TotalNumberOfQueuedAndInProgressWorkflowRuns + repositoryNames: + - summerwind/actions-runner-controller ``` ## Additional tweaks diff --git a/api/v1alpha1/horizontalrunnerautoscaler_types.go b/api/v1alpha1/horizontalrunnerautoscaler_types.go new file mode 100644 index 0000000000..b45e0ccfb3 --- /dev/null +++ b/api/v1alpha1/horizontalrunnerautoscaler_types.go @@ -0,0 +1,102 @@ +/* +Copyright 2020 The actions-runner-controller authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// HorizontalRunnerAutoscalerSpec defines the desired state of HorizontalRunnerAutoscaler +type HorizontalRunnerAutoscalerSpec struct { + // ScaleTargetRef sis the reference to scaled resource like RunnerDeployment + ScaleTargetRef ScaleTargetRef `json:"scaleTargetRef,omitempty"` + + // MinReplicas is the minimum number of replicas the deployment is allowed to scale + // +optional + MinReplicas *int `json:"minReplicas,omitempty"` + + // MinReplicas is the maximum number of replicas the deployment is allowed to scale + // +optional + MaxReplicas *int `json:"maxReplicas,omitempty"` + + // ScaleDownDelaySecondsAfterScaleUp is the approximate delay for a scale down followed by a scale up + // Used to prevent flapping (down->up->down->... loop) + // +optional + ScaleDownDelaySecondsAfterScaleUp *int `json:"scaleDownDelaySecondsAfterScaleOut,omitempty"` + + // Metrics is the collection of various metric targets to calculate desired number of runners + // +optional + Metrics []MetricSpec `json:"metrics,omitempty"` +} + +type ScaleTargetRef struct { + Name string `json:"name,omitempty"` +} + +type MetricSpec struct { + // Type is the type of metric to be used for autoscaling. + // The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns + Type string `json:"type,omitempty"` + + // RepositoryNames is the list of repository names to be used for calculating the metric. + // For example, a repository name is the REPO part of `github.com/USER/REPO`. + // +optional + RepositoryNames []string `json:"repositoryNames,omitempty"` +} + +type HorizontalRunnerAutoscalerStatus struct { + // ObservedGeneration is the most recent generation observed for the target. It corresponds to e.g. + // RunnerDeployment's generation, which is updated on mutation by the API Server. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // DesiredReplicas is the total number of desired, non-terminated and latest pods to be set for the primary RunnerSet + // This doesn't include outdated pods while upgrading the deployment and replacing the runnerset. + // +optional + DesiredReplicas *int `json:"desiredReplicas,omitempty"` + + // +optional + LastSuccessfulScaleOutTime *metav1.Time `json:"lastSuccessfulScaleOutTime,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:JSONPath=".spec.minReplicas",name=Min,type=number +// +kubebuilder:printcolumn:JSONPath=".spec.maxReplicas",name=Max,type=number +// +kubebuilder:printcolumn:JSONPath=".status.desiredReplicas",name=Desired,type=number + +// HorizontalRunnerAutoscaler is the Schema for the horizontalrunnerautoscaler API +type HorizontalRunnerAutoscaler struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec HorizontalRunnerAutoscalerSpec `json:"spec,omitempty"` + Status HorizontalRunnerAutoscalerStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// HorizontalRunnerAutoscalerList contains a list of HorizontalRunnerAutoscaler +type HorizontalRunnerAutoscalerList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []HorizontalRunnerAutoscaler `json:"items"` +} + +func init() { + SchemeBuilder.Register(&HorizontalRunnerAutoscaler{}, &HorizontalRunnerAutoscalerList{}) +} diff --git a/api/v1alpha1/runnerdeployment_types.go b/api/v1alpha1/runnerdeployment_types.go index 39556ed4d2..f6b68a2fbc 100644 --- a/api/v1alpha1/runnerdeployment_types.go +++ b/api/v1alpha1/runnerdeployment_types.go @@ -21,7 +21,7 @@ import ( ) const ( - AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns = "TotalNumberOfQueuedAndProgressingWorkflowRuns" + AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns = "TotalNumberOfQueuedAndInProgressWorkflowRuns" ) // RunnerReplicaSetSpec defines the desired state of RunnerDeployment @@ -29,41 +29,9 @@ type RunnerDeploymentSpec struct { // +optional Replicas *int `json:"replicas,omitempty"` - // MinReplicas is the minimum number of replicas the deployment is allowed to scale - // +optional - MinReplicas *int `json:"minReplicas,omitempty"` - - // MinReplicas is the maximum number of replicas the deployment is allowed to scale - // +optional - MaxReplicas *int `json:"maxReplicas,omitempty"` - - // ScaleDownDelaySecondsAfterScaleUp is the approximate delay for a scale down followed by a scale up - // Used to prevent flapping (down->up->down->... loop) - // +optional - ScaleDownDelaySecondsAfterScaleUp *int `json:"scaleDownDelaySecondsAfterScaleOut,omitempty"` - - // Autoscaling is set various configuration options for autoscaling this runner deployment. - // +optional - Autoscaling AutoscalingSpec `json:"autoscaling,omitempty"` - Template RunnerTemplate `json:"template"` } -type AutoscalingSpec struct { - Metrics []MetricSpec `json:"metrics,omitempty"` -} - -type MetricSpec struct { - // Type is the type of metric to be used for autoscaling. - // The only supported Type is TotalNumberOfQueuedAndProgressingWorkflowRuns - Type string `json:"type,omitempty"` - - // RepositoryNames is the list of repository names to be used for calculating the metric. - // For example, a repository name is the REPO part of `github.com/USER/REPO`. - // +optional - RepositoryNames []string `json:"repositoryNames,omitempty"` -} - type RunnerDeploymentStatus struct { AvailableReplicas int `json:"availableReplicas"` ReadyReplicas int `json:"readyReplicas"` @@ -72,9 +40,6 @@ type RunnerDeploymentStatus struct { // This doesn't include outdated pods while upgrading the deployment and replacing the runnerset. // +optional Replicas *int `json:"desiredReplicas,omitempty"` - - // +optional - LastSuccessfulScaleOutTime *metav1.Time `json:"lastSuccessfulScaleOutTime,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index cf10a6e6d8..bef95f06b2 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -26,8 +26,83 @@ import ( ) // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *AutoscalingSpec) DeepCopyInto(out *AutoscalingSpec) { +func (in *HorizontalRunnerAutoscaler) DeepCopyInto(out *HorizontalRunnerAutoscaler) { *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HorizontalRunnerAutoscaler. +func (in *HorizontalRunnerAutoscaler) DeepCopy() *HorizontalRunnerAutoscaler { + if in == nil { + return nil + } + out := new(HorizontalRunnerAutoscaler) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HorizontalRunnerAutoscaler) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HorizontalRunnerAutoscalerList) DeepCopyInto(out *HorizontalRunnerAutoscalerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]HorizontalRunnerAutoscaler, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HorizontalRunnerAutoscalerList. +func (in *HorizontalRunnerAutoscalerList) DeepCopy() *HorizontalRunnerAutoscalerList { + if in == nil { + return nil + } + out := new(HorizontalRunnerAutoscalerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *HorizontalRunnerAutoscalerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HorizontalRunnerAutoscalerSpec) DeepCopyInto(out *HorizontalRunnerAutoscalerSpec) { + *out = *in + out.ScaleTargetRef = in.ScaleTargetRef + if in.MinReplicas != nil { + in, out := &in.MinReplicas, &out.MinReplicas + *out = new(int) + **out = **in + } + if in.MaxReplicas != nil { + in, out := &in.MaxReplicas, &out.MaxReplicas + *out = new(int) + **out = **in + } + if in.ScaleDownDelaySecondsAfterScaleUp != nil { + in, out := &in.ScaleDownDelaySecondsAfterScaleUp, &out.ScaleDownDelaySecondsAfterScaleUp + *out = new(int) + **out = **in + } if in.Metrics != nil { in, out := &in.Metrics, &out.Metrics *out = make([]MetricSpec, len(*in)) @@ -37,12 +112,36 @@ func (in *AutoscalingSpec) DeepCopyInto(out *AutoscalingSpec) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AutoscalingSpec. -func (in *AutoscalingSpec) DeepCopy() *AutoscalingSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HorizontalRunnerAutoscalerSpec. +func (in *HorizontalRunnerAutoscalerSpec) DeepCopy() *HorizontalRunnerAutoscalerSpec { + if in == nil { + return nil + } + out := new(HorizontalRunnerAutoscalerSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HorizontalRunnerAutoscalerStatus) DeepCopyInto(out *HorizontalRunnerAutoscalerStatus) { + *out = *in + if in.DesiredReplicas != nil { + in, out := &in.DesiredReplicas, &out.DesiredReplicas + *out = new(int) + **out = **in + } + if in.LastSuccessfulScaleOutTime != nil { + in, out := &in.LastSuccessfulScaleOutTime, &out.LastSuccessfulScaleOutTime + *out = (*in).DeepCopy() + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HorizontalRunnerAutoscalerStatus. +func (in *HorizontalRunnerAutoscalerStatus) DeepCopy() *HorizontalRunnerAutoscalerStatus { if in == nil { return nil } - out := new(AutoscalingSpec) + out := new(HorizontalRunnerAutoscalerStatus) in.DeepCopyInto(out) return out } @@ -161,22 +260,6 @@ func (in *RunnerDeploymentSpec) DeepCopyInto(out *RunnerDeploymentSpec) { *out = new(int) **out = **in } - if in.MinReplicas != nil { - in, out := &in.MinReplicas, &out.MinReplicas - *out = new(int) - **out = **in - } - if in.MaxReplicas != nil { - in, out := &in.MaxReplicas, &out.MaxReplicas - *out = new(int) - **out = **in - } - if in.ScaleDownDelaySecondsAfterScaleUp != nil { - in, out := &in.ScaleDownDelaySecondsAfterScaleUp, &out.ScaleDownDelaySecondsAfterScaleUp - *out = new(int) - **out = **in - } - in.Autoscaling.DeepCopyInto(&out.Autoscaling) in.Template.DeepCopyInto(&out.Template) } @@ -198,10 +281,6 @@ func (in *RunnerDeploymentStatus) DeepCopyInto(out *RunnerDeploymentStatus) { *out = new(int) **out = **in } - if in.LastSuccessfulScaleOutTime != nil { - in, out := &in.LastSuccessfulScaleOutTime, &out.LastSuccessfulScaleOutTime - *out = (*in).DeepCopy() - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunnerDeploymentStatus. @@ -510,3 +589,18 @@ func (in *RunnerTemplate) DeepCopy() *RunnerTemplate { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ScaleTargetRef) DeepCopyInto(out *ScaleTargetRef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScaleTargetRef. +func (in *ScaleTargetRef) DeepCopy() *ScaleTargetRef { + if in == nil { + return nil + } + out := new(ScaleTargetRef) + in.DeepCopyInto(out) + return out +} diff --git a/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml new file mode 100644 index 0000000000..9459475bfc --- /dev/null +++ b/config/crd/bases/actions.summerwind.dev_horizontalrunnerautoscalers.yaml @@ -0,0 +1,118 @@ + +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.2.4 + creationTimestamp: null + name: horizontalrunnerautoscalers.actions.summerwind.dev +spec: + additionalPrinterColumns: + - JSONPath: .spec.minReplicas + name: Min + type: number + - JSONPath: .spec.maxReplicas + name: Max + type: number + - JSONPath: .status.desiredReplicas + name: Desired + type: number + group: actions.summerwind.dev + names: + kind: HorizontalRunnerAutoscaler + listKind: HorizontalRunnerAutoscalerList + plural: horizontalrunnerautoscalers + singular: horizontalrunnerautoscaler + scope: Namespaced + subresources: + status: {} + validation: + openAPIV3Schema: + description: HorizontalRunnerAutoscaler is the Schema for the horizontalrunnerautoscaler + API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: HorizontalRunnerAutoscalerSpec defines the desired state of + HorizontalRunnerAutoscaler + properties: + maxReplicas: + description: MinReplicas is the maximum number of replicas the deployment + is allowed to scale + type: integer + metrics: + description: Metrics is the collection of various metric targets to + calculate desired number of runners + items: + properties: + repositoryNames: + description: RepositoryNames is the list of repository names to + be used for calculating the metric. For example, a repository + name is the REPO part of `github.com/USER/REPO`. + items: + type: string + type: array + type: + description: Type is the type of metric to be used for autoscaling. + The only supported Type is TotalNumberOfQueuedAndInProgressWorkflowRuns + type: string + type: object + type: array + minReplicas: + description: MinReplicas is the minimum number of replicas the deployment + is allowed to scale + type: integer + scaleDownDelaySecondsAfterScaleOut: + description: ScaleDownDelaySecondsAfterScaleUp is the approximate delay + for a scale down followed by a scale up Used to prevent flapping (down->up->down->... + loop) + type: integer + scaleTargetRef: + description: ScaleTargetRef sis the reference to scaled resource like + RunnerDeployment + properties: + name: + type: string + type: object + type: object + status: + properties: + desiredReplicas: + description: DesiredReplicas is the total number of desired, non-terminated + and latest pods to be set for the primary RunnerSet This doesn't include + outdated pods while upgrading the deployment and replacing the runnerset. + type: integer + lastSuccessfulScaleOutTime: + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the most recent generation observed + for the target. It corresponds to e.g. RunnerDeployment's generation, + which is updated on mutation by the API Server. + format: int64 + type: integer + type: object + type: object + version: v1alpha1 + versions: + - name: v1alpha1 + served: true + storage: true +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml b/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml index e396520150..043c831d6a 100644 --- a/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml +++ b/config/crd/bases/actions.summerwind.dev_runnerdeployments.yaml @@ -46,42 +46,8 @@ spec: spec: description: RunnerReplicaSetSpec defines the desired state of RunnerDeployment properties: - autoscaling: - description: Autoscaling is set various configuration options for autoscaling - this runner deployment. - properties: - metrics: - items: - properties: - repositoryNames: - description: RepositoryNames is the list of repository names - to be used for calculating the metric. For example, a repository - name is the REPO part of `github.com/USER/REPO`. - items: - type: string - type: array - type: - description: Type is the type of metric to be used for autoscaling. - The only supported Type is TotalNumberOfQueuedAndProgressingWorkflowRuns - type: string - type: object - type: array - type: object - maxReplicas: - description: MinReplicas is the maximum number of replicas the deployment - is allowed to scale - type: integer - minReplicas: - description: MinReplicas is the minimum number of replicas the deployment - is allowed to scale - type: integer replicas: type: integer - scaleDownDelaySecondsAfterScaleOut: - description: ScaleDownDelaySecondsAfterScaleUp is the approximate delay - for a scale down followed by a scale up Used to prevent flapping (down->up->down->... - loop) - type: integer template: properties: metadata: @@ -6762,9 +6728,6 @@ spec: and latest pods to be set for the primary RunnerSet This doesn't include outdated pods while upgrading the deployment and replacing the runnerset. type: integer - lastSuccessfulScaleOutTime: - format: date-time - type: string readyReplicas: type: integer required: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 87241a5948..483ab3d841 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -6,6 +6,26 @@ metadata: creationTimestamp: null name: manager-role rules: +- apiGroups: + - actions.summerwind.dev + resources: + - horizontalrunnerautoscalers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - actions.summerwind.dev + resources: + - horizontalrunnerautoscalers/status + verbs: + - get + - patch + - update - apiGroups: - actions.summerwind.dev resources: diff --git a/controllers/autoscaling.go b/controllers/autoscaling.go index a1ce5d26df..661534dff3 100644 --- a/controllers/autoscaling.go +++ b/controllers/autoscaling.go @@ -8,13 +8,11 @@ import ( "strings" ) -func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment) (*int, error) { - if rd.Spec.Replicas != nil { - return nil, fmt.Errorf("bug: determineDesiredReplicas should not be called for deplomeny with specific replicas") - } else if rd.Spec.MinReplicas == nil { - return nil, fmt.Errorf("runnerdeployment %s/%s is missing minReplicas", rd.Namespace, rd.Name) - } else if rd.Spec.MaxReplicas == nil { - return nil, fmt.Errorf("runnerdeployment %s/%s is missing maxReplicas", rd.Namespace, rd.Name) +func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { + if hra.Spec.MinReplicas == nil { + return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing minReplicas", hra.Namespace, hra.Name) + } else if hra.Spec.MaxReplicas == nil { + return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name) } var repos [][]string @@ -26,12 +24,12 @@ func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.Runner return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") } - metrics := rd.Spec.Autoscaling.Metrics + metrics := hra.Spec.Metrics if len(metrics) == 0 { return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required") - } else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns { - return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns) + } else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { + return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns) } else if len(metrics[0].RepositoryNames) == 0 { return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment") } @@ -74,8 +72,8 @@ func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.Runner } } - minReplicas := *rd.Spec.MinReplicas - maxReplicas := *rd.Spec.MaxReplicas + minReplicas := *hra.Spec.MinReplicas + maxReplicas := *hra.Spec.MaxReplicas necessaryReplicas := queued + inProgress var desiredReplicas int diff --git a/controllers/autoscaling_test.go b/controllers/autoscaling_test.go index 8889e3ebfa..56c65b16b1 100644 --- a/controllers/autoscaling_test.go +++ b/controllers/autoscaling_test.go @@ -115,9 +115,12 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { }, // fixed at 3 { - repo: "test/valid", - fixed: intPtr(3), - want: 3, + repo: "test/valid", + min: intPtr(1), + max: intPtr(3), + fixed: intPtr(3), + workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 3, }, } @@ -137,7 +140,7 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { defer server.Close() client := newGithubClient(server) - r := &RunnerDeploymentReconciler{ + h := &HorizontalRunnerAutoscalerReconciler{ Log: log, GitHubClient: client, Scheme: scheme, @@ -145,23 +148,34 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { rd := v1alpha1.RunnerDeployment{ TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "testrd", + }, Spec: v1alpha1.RunnerDeploymentSpec{ Template: v1alpha1.RunnerTemplate{ Spec: v1alpha1.RunnerSpec{ Repository: tc.repo, }, }, - Replicas: tc.fixed, + Replicas: tc.fixed, + }, + Status: v1alpha1.RunnerDeploymentStatus{ + Replicas: tc.sReplicas, + }, + } + + hra := v1alpha1.HorizontalRunnerAutoscaler{ + Spec: v1alpha1.HorizontalRunnerAutoscalerSpec{ MaxReplicas: tc.max, MinReplicas: tc.min, }, - Status: v1alpha1.RunnerDeploymentStatus{ - Replicas: tc.sReplicas, + Status: v1alpha1.HorizontalRunnerAutoscalerStatus{ + DesiredReplicas: tc.sReplicas, LastSuccessfulScaleOutTime: tc.sTime, }, } - rs, err := r.newRunnerReplicaSetWithAutoscaling(rd) + got, err := h.computeReplicas(rd, hra) if err != nil { if tc.err == "" { t.Fatalf("unexpected error: expected none, got %v", err) @@ -171,8 +185,6 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) { return } - got := rs.Spec.Replicas - if got == nil { t.Fatalf("unexpected value of rs.Spec.Replicas: nil") } @@ -278,16 +290,23 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { }, // fixed at 3 { - org: "test", - repos: []string{"valid"}, - fixed: intPtr(3), - want: 3, + org: "test", + repos: []string{"valid"}, + fixed: intPtr(1), + min: intPtr(1), + max: intPtr(3), + workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 3, }, // org runner, fixed at 3 { - org: "test", - fixed: intPtr(3), - want: 3, + org: "test", + repos: []string{"valid"}, + fixed: intPtr(1), + min: intPtr(1), + max: intPtr(3), + workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`, + want: 3, }, // org runner, 1 demanded, min at 1, no repos { @@ -315,39 +334,50 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { defer server.Close() client := newGithubClient(server) - r := &RunnerDeploymentReconciler{ + h := &HorizontalRunnerAutoscalerReconciler{ Log: log, - GitHubClient: client, Scheme: scheme, + GitHubClient: client, } rd := v1alpha1.RunnerDeployment{ - TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: "testrd", + }, Spec: v1alpha1.RunnerDeploymentSpec{ Template: v1alpha1.RunnerTemplate{ Spec: v1alpha1.RunnerSpec{ Organization: tc.org, }, }, - Autoscaling: v1alpha1.AutoscalingSpec{ - Metrics: []v1alpha1.MetricSpec{ - { - Type: v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns, - RepositoryNames: tc.repos, - }, - }, + Replicas: tc.fixed, + }, + Status: v1alpha1.RunnerDeploymentStatus{ + Replicas: tc.sReplicas, + }, + } + + hra := v1alpha1.HorizontalRunnerAutoscaler{ + Spec: v1alpha1.HorizontalRunnerAutoscalerSpec{ + ScaleTargetRef: v1alpha1.ScaleTargetRef{ + Name: "testrd", }, - Replicas: tc.fixed, MaxReplicas: tc.max, MinReplicas: tc.min, + Metrics: []v1alpha1.MetricSpec{ + { + Type: v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns, + RepositoryNames: tc.repos, + }, + }, }, - Status: v1alpha1.RunnerDeploymentStatus{ - Replicas: tc.sReplicas, + Status: v1alpha1.HorizontalRunnerAutoscalerStatus{ + DesiredReplicas: tc.sReplicas, LastSuccessfulScaleOutTime: tc.sTime, }, } - rs, err := r.newRunnerReplicaSetWithAutoscaling(rd) + got, err := h.computeReplicas(rd, hra) if err != nil { if tc.err == "" { t.Fatalf("unexpected error: expected none, got %v", err) @@ -357,10 +387,8 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) { return } - got := rs.Spec.Replicas - if got == nil { - t.Fatalf("unexpected value of rs.Spec.Replicas: nil") + t.Fatalf("unexpected value of rs.Spec.Replicas: nil, wanted %v", tc.want) } if *got != tc.want { diff --git a/controllers/horizontalrunnerautoscaler_controller.go b/controllers/horizontalrunnerautoscaler_controller.go new file mode 100644 index 0000000000..5de8575e28 --- /dev/null +++ b/controllers/horizontalrunnerautoscaler_controller.go @@ -0,0 +1,184 @@ +/* +Copyright 2020 The actions-runner-controller authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "time" + + "github.com/summerwind/actions-runner-controller/github" + "k8s.io/apimachinery/pkg/types" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/summerwind/actions-runner-controller/api/v1alpha1" +) + +const ( + DefaultScaleDownDelay = 10 * time.Minute +) + +// HorizontalRunnerAutoscalerReconciler reconciles a HorizontalRunnerAutoscaler object +type HorizontalRunnerAutoscalerReconciler struct { + client.Client + GitHubClient *github.Client + Log logr.Logger + Recorder record.EventRecorder + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch + +func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { + ctx := context.Background() + log := r.Log.WithValues("horizontalrunnerautoscaler", req.NamespacedName) + + var hra v1alpha1.HorizontalRunnerAutoscaler + if err := r.Get(ctx, req.NamespacedName, &hra); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + if !hra.ObjectMeta.DeletionTimestamp.IsZero() { + return ctrl.Result{}, nil + } + + var rd v1alpha1.RunnerDeployment + if err := r.Get(ctx, types.NamespacedName{ + Namespace: req.Namespace, + Name: hra.Spec.ScaleTargetRef.Name, + }, &rd); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + if !rd.ObjectMeta.DeletionTimestamp.IsZero() { + return ctrl.Result{}, nil + } + + replicas, err := r.computeReplicas(rd, hra) + if err != nil { + r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error()) + + log.Error(err, "Could not compute replicas") + + return ctrl.Result{}, err + } + + const defaultReplicas = 1 + + currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas) + newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas) + + // Please add more conditions that we can in-place update the newest runnerreplicaset without disruption + if currentDesiredReplicas != newDesiredReplicas { + copy := rd.DeepCopy() + copy.Spec.Replicas = &newDesiredReplicas + + if err := r.Client.Update(ctx, copy); err != nil { + log.Error(err, "Failed to update runnerderployment resource") + + return ctrl.Result{}, err + } + + return ctrl.Result{}, err + } + + if hra.Status.DesiredReplicas == nil || *hra.Status.DesiredReplicas != *replicas { + updated := hra.DeepCopy() + + if (hra.Status.DesiredReplicas == nil && *replicas > 1) || + (hra.Status.DesiredReplicas != nil && *replicas > *hra.Status.DesiredReplicas) { + + updated.Status.LastSuccessfulScaleOutTime = &metav1.Time{Time: time.Now()} + } + + updated.Status.DesiredReplicas = replicas + + if err := r.Status().Update(ctx, updated); err != nil { + log.Error(err, "Failed to update horizontalrunnerautoscaler status") + + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} + +func (r *HorizontalRunnerAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.Recorder = mgr.GetEventRecorderFor("runnerdeployment-controller") + + if err := mgr.GetFieldIndexer().IndexField(&v1alpha1.RunnerReplicaSet{}, runnerSetOwnerKey, func(rawObj runtime.Object) []string { + runnerSet := rawObj.(*v1alpha1.RunnerReplicaSet) + owner := metav1.GetControllerOf(runnerSet) + if owner == nil { + return nil + } + + if owner.APIVersion != v1alpha1.GroupVersion.String() || owner.Kind != "RunnerDeployment" { + return nil + } + + return []string{owner.Name} + }); err != nil { + return err + } + + return ctrl.NewControllerManagedBy(mgr). + For(&v1alpha1.RunnerDeployment{}). + Owns(&v1alpha1.RunnerReplicaSet{}). + Complete(r) +} + +func (r *HorizontalRunnerAutoscalerReconciler) computeReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) { + var computedReplicas *int + + replicas, err := r.determineDesiredReplicas(rd, hra) + if err != nil { + return nil, err + } + + var scaleDownDelay time.Duration + + if hra.Spec.ScaleDownDelaySecondsAfterScaleUp != nil { + scaleDownDelay = time.Duration(*hra.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second + } else { + scaleDownDelay = DefaultScaleDownDelay + } + + now := time.Now() + + if hra.Status.DesiredReplicas == nil || + *hra.Status.DesiredReplicas < *replicas || + hra.Status.LastSuccessfulScaleOutTime == nil || + hra.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) { + + computedReplicas = replicas + } else { + computedReplicas = hra.Status.DesiredReplicas + } + + return computedReplicas, nil +} diff --git a/controllers/runnerdeployment_controller.go b/controllers/runnerdeployment_controller.go index c1a7c0d177..2848f9e9ea 100644 --- a/controllers/runnerdeployment_controller.go +++ b/controllers/runnerdeployment_controller.go @@ -23,7 +23,6 @@ import ( "sort" "time" - "github.com/summerwind/actions-runner-controller/github" "k8s.io/apimachinery/pkg/types" "github.com/davecgh/go-spew/spew" @@ -49,10 +48,9 @@ const ( // RunnerDeploymentReconciler reconciles a Runner object type RunnerDeploymentReconciler struct { client.Client - GitHubClient *github.Client - Log logr.Logger - Recorder record.EventRecorder - Scheme *runtime.Scheme + Log logr.Logger + Recorder record.EventRecorder + Scheme *runtime.Scheme } // +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;create;update;patch;delete @@ -97,7 +95,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e oldSets = myRunnerReplicaSets[1:] } - desiredRS, err := r.newRunnerReplicaSetWithAutoscaling(rd) + desiredRS, err := r.newRunnerReplicaSet(rd) if err != nil { r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error()) @@ -193,12 +191,6 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e updated := rd.DeepCopy() updated.Status.Replicas = desiredRS.Spec.Replicas - if (rd.Status.Replicas == nil && *desiredRS.Spec.Replicas > 1) || - (rd.Status.Replicas != nil && *desiredRS.Spec.Replicas > *rd.Status.Replicas) { - - updated.Status.LastSuccessfulScaleOutTime = &metav1.Time{Time: time.Now()} - } - if err := r.Status().Update(ctx, updated); err != nil { log.Error(err, "Failed to update runnerdeployment status") @@ -263,7 +255,7 @@ func CloneAndAddLabel(labels map[string]string, labelKey, labelValue string) map return newLabels } -func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment, computedReplicas *int) (*v1alpha1.RunnerReplicaSet, error) { +func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment) (*v1alpha1.RunnerReplicaSet, error) { newRSTemplate := *rd.Spec.Template.DeepCopy() templateHash := ComputeHash(&newRSTemplate) // Add template hash label to selector. @@ -284,10 +276,6 @@ func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeplo }, } - if computedReplicas != nil { - rs.Spec.Replicas = computedReplicas - } - if err := ctrl.SetControllerReference(&rd, &rs, r.Scheme); err != nil { return &rs, err } @@ -319,36 +307,3 @@ func (r *RunnerDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error { Owns(&v1alpha1.RunnerReplicaSet{}). Complete(r) } - -func (r *RunnerDeploymentReconciler) newRunnerReplicaSetWithAutoscaling(rd v1alpha1.RunnerDeployment) (*v1alpha1.RunnerReplicaSet, error) { - var computedReplicas *int - - if rd.Spec.Replicas == nil { - replicas, err := r.determineDesiredReplicas(rd) - if err != nil { - return nil, err - } - - var scaleDownDelay time.Duration - - if rd.Spec.ScaleDownDelaySecondsAfterScaleUp != nil { - scaleDownDelay = time.Duration(*rd.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second - } else { - scaleDownDelay = 10 * time.Minute - } - - now := time.Now() - - if rd.Status.Replicas == nil || - *rd.Status.Replicas < *replicas || - rd.Status.LastSuccessfulScaleOutTime == nil || - rd.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) { - - computedReplicas = replicas - } else { - computedReplicas = rd.Status.Replicas - } - } - - return r.newRunnerReplicaSet(rd, computedReplicas) -} diff --git a/main.go b/main.go index 22545cbc17..026396ba21 100644 --- a/main.go +++ b/main.go @@ -169,14 +169,25 @@ func main() { } runnerDeploymentReconciler := &controllers.RunnerDeploymentReconciler{ + Client: mgr.GetClient(), + Log: ctrl.Log.WithName("controllers").WithName("RunnerDeployment"), + Scheme: mgr.GetScheme(), + } + + if err = runnerDeploymentReconciler.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "RunnerDeployment") + os.Exit(1) + } + + horizontalRunnerAutoscaler := &controllers.HorizontalRunnerAutoscalerReconciler{ Client: mgr.GetClient(), - Log: ctrl.Log.WithName("controllers").WithName("RunnerDeployment"), + Log: ctrl.Log.WithName("controllers").WithName("HorizontalRunnerAutoscaler"), Scheme: mgr.GetScheme(), GitHubClient: ghClient, } - if err = runnerDeploymentReconciler.SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "RunnerDeployment") + if err = horizontalRunnerAutoscaler.SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "HorizontalRunnerAutoscaler") os.Exit(1) }