diff --git a/README.md b/README.md index 058ff3be13..7520bdf002 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,9 @@ example-runnerdeploy2475h595fr mumoshu/actions-runner-controller-ci Running example-runnerdeploy2475ht2qbr mumoshu/actions-runner-controller-ci Running ``` -#### Autoscaling +### Autoscaling + +#### Repository runners Autoscaling `RunnerDeployment` can scale the number of runners between `minReplicas` and `maxReplicas` fields, depending on pending workflow runs. @@ -241,6 +243,8 @@ The scale out performance is controlled via the manager containers startup `--sy Additionally, the autoscaling feature has an anti-flapping option that prevents periodic loop of scaling up and down. By default, it doesn't scale down until the grace period of 10 minutes passes after a scale up. The grace period can be configured by setting `scaleDownDelaySecondsAfterScaleUp`: +Please note that if your `RunnerDeployment` has the `Repository` key set, then do not use the `` notation, only provide the ``. + ```yaml apiVersion: actions.summerwind.dev/v1alpha1 kind: RunnerDeployment @@ -267,6 +271,36 @@ spec: - summerwind/actions-runner-controller ``` +#### Organization runners Autoscaling +To autoscale on an organizational level, you need to remove the `repositoryNames` mapping and leave it empty. The Github Actions API doesn’t offer an endpoint to check the currently queued workflows on an organizational level. The way how the controller tries to get around this is by - after each `sync-period` - select the repositories with the latest `pushed` time and check the Actions workflow queue of those repositories. At the moment, the controller checks the last 10 repositories. + +Please note, in case you want to autoscale your organization runners, that you should modify your Github organization permissions accordingly; for instance, if you are using an organization PAT (Personal Access Token), update the permissions of your PAT to allow the controller to list all the repositories under your organization. + +An example of scaling your organization runners is shown below: + +```yaml +apiVersion: actions.summerwind.dev/v1alpha1 +kind: RunnerDeployment +metadata: + name: example-runnerdeploy +spec: + template: + spec: + organization: "your-organization-name" +--- +apiVersion: actions.summerwind.dev/v1alpha1 +kind: HorizontalRunnerAutoscaler +metadata: + name: example-runnerdeploy-autoscaler +spec: + scaleTargetRef: + name: example-runnerdeploy + minReplicas: 1 + maxReplicas: 3 + metrics: + - type: TotalNumberOfQueuedAndInProgressWorkflowRuns +``` + ## Runner with DinD When using default runner, runner pod starts up 2 containers: runner and DinD (Docker-in-Docker). This might create issues if there's `LimitRange` set to namespace. @@ -321,7 +355,7 @@ spec: requests: cpu: "2.0" memory: "4Gi" - # If set to false, there are no privileged container and you cannot use docker. + # If set to false, there are no privileged container and you cannot use docker. dockerEnabled: false # If set to true, runner pod container only 1 container that's expected to be able to run docker, too. # image summerwind/actions-runner-dind or custom one should be used with true -value diff --git a/controllers/autoscaling.go b/controllers/autoscaling.go index 95e9dfedf4..eb8a63cb93 100644 --- a/controllers/autoscaling.go +++ b/controllers/autoscaling.go @@ -2,10 +2,11 @@ package controllers import ( "context" - "errors" "fmt" + "log" "strings" + "github.com/google/go-github/v32/github" "github.com/summerwind/actions-runner-controller/api/v1alpha1" ) @@ -18,30 +19,58 @@ func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alp var repos [][]string - repoID := rd.Spec.Template.Spec.Repository - if repoID == "" { - orgName := rd.Spec.Template.Spec.Organization - if orgName == "" { - return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") - } + orgName := rd.Spec.Template.Spec.Organization + if orgName == "" { + return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path") + } + + metrics := hra.Spec.Metrics + if len(metrics) == 0 { + return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required") + } else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { + return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns) + } - metrics := hra.Spec.Metrics + if len(metrics[0].RepositoryNames) == 0 { + options := &github.RepositoryListByOrgOptions{ + Type: "private", + Sort: "pushed", + } + orgRepos, _, err := r.GitHubClient.Repositories.ListByOrg(context.Background(), orgName, options) + if err != nil { + return nil, fmt.Errorf("[ERROR] error fetching a list of repositories for the %s organization with error message: %s", orgName, err) + } - if len(metrics) == 0 { - return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required") - } else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns { - return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns) - } else if len(metrics[0].RepositoryNames) == 0 { - return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment") + if len(orgRepos) < 1 { + return nil, fmt.Errorf("[ERROR] ListByOrg returned empty slice! Does your PAT have enough access and is it authorized to list the organizational repositories?") } - for _, repoName := range metrics[0].RepositoryNames { + for _, v := range orgRepos { + repoName := fmt.Sprint(*v.Name) + + // We kind of already make sure that we don't use these repo's by using the `ListByOrgOptions` field, this is just an extra safeguard. + if *v.Archived || *v.Disabled { + continue + } + + // Some organizations have hundreds to thousands of repositories; we only need the X most recent ones. + if len(repos) >= 10 { + log.Printf("[INFO] Reached the limit of repos, performing check on these repositories: %s", repos) + break + } repos = append(repos, []string{orgName, repoName}) } + log.Printf("[INFO] watching the following organizational repositories: %s", repos) } else { - repo := strings.Split(repoID, "/") - - repos = append(repos, repo) + repoID := rd.Spec.Template.Spec.Repository + if repoID == "" { + for _, repoName := range metrics[0].RepositoryNames { + repos = append(repos, []string{orgName, repoName}) + } + } else { + repo := strings.Split(repoID, "/") + repos = append(repos, repo) + } } var total, inProgress, queued, completed, unknown int