Skip to content

Commit

Permalink
feat: pull the Sidero configuration as clusterctl variables
Browse files Browse the repository at this point in the history
This allows to skip patching any components on initial install, and if
the variables are preserved, settings are preserved across upgrades.

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira authored and talos-bot committed Apr 23, 2021
1 parent 23c75e4 commit 1e33dcd
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 171 deletions.
2 changes: 0 additions & 2 deletions app/cluster-api-provider-sidero/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ spec:
containers:
- command:
- /manager
args:
- --enable-leader-election
image: controller:latest
imagePullPolicy: Always
name: manager
Expand Down
2 changes: 1 addition & 1 deletion app/cluster-api-provider-sidero/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func main() {
)

flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false,
flag.BoolVar(&enableLeaderElection, "enable-leader-election", true,
"Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
flag.IntVar(&webhookPort, "webhook-port", 0, "Webhook Server port, disabled by default. When enabled, the manager will only work as webhook server, no reconcilers are installed.")
flag.Parse()
Expand Down
10 changes: 9 additions & 1 deletion app/metal-controller-manager/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,19 @@ spec:
labels:
control-plane: metal-controller-manager
spec:
hostNetwork: ${SIDERO_CONTROLLER_MANAGER_HOST_NETWORK:=false}
containers:
- command:
- /manager
args:
- --enable-leader-election=false
- --metrics-addr=127.0.0.1:8080
- --api-endpoint=${SIDERO_CONTROLLER_MANAGER_API_ENDPOINT:=-}
- --extra-agent-kernel-args=${SIDERO_CONTROLLER_MANAGER_EXTRA_AGENT_KERNEL_ARGS:=-}
- --auto-accept-servers=${SIDERO_CONTROLLER_MANAGER_AUTO_ACCEPT_SERVERS:=false}
- --insecure-wipe=${SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE:=true}
- --server-reboot-timeout=${SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT:=20m}
- --test-power-simulated-explicit-failure-prob=${SIDERO_CONTROLLER_MANAGER_TEST_POWER_EXPLICIT_FAILURE:=0}
- --test-power-simulated-silent-failure-prob=${SIDERO_CONTROLLER_MANAGER_TEST_POWER_SILENT_FAILURE:=0}
image: controller:latest
imagePullPolicy: Always
name: manager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,3 @@ spec:
ports:
- containerPort: 8443
name: https
- name: manager
args:
- "--metrics-addr=127.0.0.1:8080"
- "--enable-leader-election"
11 changes: 10 additions & 1 deletion app/metal-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func main() {
flag.StringVar(&apiEndpoint, "api-endpoint", "", "The endpoint used by the discovery environment.")
flag.StringVar(&metricsAddr, "metrics-addr", ":8081", "The address the metric endpoint binds to.")
flag.StringVar(&extraAgentKernelArgs, "extra-agent-kernel-args", "", "A comma delimited list of key-value pairs to be added to the agent environment kernel parameters.")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&enableLeaderElection, "enable-leader-election", true, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&autoAcceptServers, "auto-accept-servers", false, "Add servers as 'accepted' when they register with Sidero API.")
flag.BoolVar(&insecureWipe, "insecure-wipe", true, "Wipe head of the disk only (if false, wipe whole disk).")
flag.DurationVar(&serverRebootTimeout, "server-reboot-timeout", constants.DefaultServerRebootTimeout, "Timeout to wait for the server to restart and start wipe.")
Expand All @@ -76,6 +76,15 @@ func main() {

flag.Parse()

// workaround for clusterctl not accepting empty value as default value
if extraAgentKernelArgs == "-" {
extraAgentKernelArgs = ""
}

if apiEndpoint == "-" {
apiEndpoint = ""
}

// only for testing, doesn't affect production, default values simulate no failures
api.DefaultDice = api.NewFailureDice(testPowerSimulatedExplicitFailureProb, testPowerSimulatedSilentFailureProb)

Expand Down
5 changes: 4 additions & 1 deletion app/metal-metadata-server/config/server/server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@ spec:
labels:
control-plane: metal-metadata-server
spec:
hostNetwork: ${SIDERO_METADATA_SERVER_HOST_NETWORK:=false}
containers:
- image: server:latest
imagePullPolicy: Always
args:
- --port=${SIDERO_METADATA_SERVER_PORT:=8080}
name: server
ports:
- containerPort: 8080
- containerPort: ${SIDERO_METADATA_SERVER_PORT:=8080}
name: http
protocol: TCP
resources:
Expand Down
14 changes: 13 additions & 1 deletion docs/website/content/docs/v0.3/Getting Started/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,22 @@ weight: 2

# Installation

As of Cluster API version 0.3.9, Sidero is included as a default infrastructure provider in clusterctl.
As of Cluster API version 0.3.9, Sidero is included as a default infrastructure provider in `clusterctl`.

To install Sidero and the other Talos providers, simply issue:

```bash
clusterctl init -b talos -c talos -i sidero
```

Sidero supports several variables to configure the installation, these variables can be set either as environment
variables or as variables in the `clusterctl` configuration:

* `SIDERO_CONTROLLER_MANAGER_HOST_NETWORK` (`false`): run `sidero-controller-manager` on host network
* `SIDERO_CONTROLLER_MANAGER_API_ENDPOINT` (empty): specifies the IP address controller manager can be reached on, defaults to the node IP
* `SIDERO_CONTROLLER_MANAGER_EXTRA_AGENT_KERNEL_ARGS` (empty): specifies additional Linux kernel arguments for the Sidero agent (for example, different console settings)
* `SIDERO_CONTROLLER_MANAGER_AUTO_ACCEPT_SERVERS` (`false`): automatically accept discovered servers, by default `.spec.accepted` should be changed to `true` to accept the server
* `SIDERO_CONTROLLER_MANAGER_INSECURE_WIPE` (`true`): wipe only the first megabyte of each disk on the server, otherwise wipe the full disk
* `SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT` (`20m`): timeout for the server reboot (how long it might take for the server to be rebooted before Sidero retries an IPMI reboot operation)
* `SIDERO_METADATA_SERVER_HOST_NETWORK` (`false`): run `sidero-metadta-server` on host network
* `SIDERO_METADATA_SERVER_PORT` (`8080`): port to use for the metadata server
8 changes: 4 additions & 4 deletions docs/website/content/docs/v0.3/Getting Started/resources.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,21 +91,21 @@ These define a desired deployment environment for Talos, including things like w
Sidero allows you to define a default environment, as well as other environments that may be specific to a subset of nodes.
Users can override the environment at the `ServerClass` or `Server` level, if you have requirements for different kernels or kernel parameters.

See the [Environments](/docs/v0.1/configuration/environments/) section of our Configuration docs for examples and more detail.
See the [Environments](/docs/v0.3/configuration/environments/) section of our Configuration docs for examples and more detail.

#### `Servers`

These represent physical machines as resources in the management plane.
These `Servers` are created when the physical machine PXE boots and completes a "discovery" process in which it registers with the management plane and provides SMBIOS information such as the CPU manufacturer and version, and memory information.

See the [Servers](/docs/v0.1/configuration/servers/) section of our Configuration docs for examples and more detail.
See the [Servers](/docs/v0.3/configuration/servers/) section of our Configuration docs for examples and more detail.

#### `ServerClasses`

`ServerClasses` are a grouping of the `Servers` mentioned above, grouped to create classes of servers based on Memory, CPU or other attributes.
These can be used to compose a bank of `Servers` that are eligible for provisioning.

See the [ServerClasses](/docs/v0.1/configuration/serverclasses/) section of our Configuration docs for examples and more detail.
See the [ServerClasses](/docs/v0.3/configuration/serverclasses/) section of our Configuration docs for examples and more detail.

### Metal Metadata Server

Expand All @@ -116,4 +116,4 @@ While the metadata server does not present unique CRDs within Kubernetes, it's i
The metadata server may be familiar to you if you have used cloud environments previously.
Using Talos machine configurations created by the Talos Cluster API bootstrap provider, along with patches specified by editing `Server`/`ServerClass` resources or `TalosConfig`/`TalosControlPlane` resources, metadata is returned to servers who query the metadata server at boot time.

See the [Metadata](/docs/v0.1/configuration/metadata/) section of our Configuration docs for examples and more detail.
See the [Metadata](/docs/v0.3/configuration/metadata/) section of our Configuration docs for examples and more detail.
42 changes: 9 additions & 33 deletions docs/website/content/docs/v0.3/Guides/bootstrapping.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,38 +123,14 @@ As of Cluster API version 0.3.9, Sidero is included as a default infrastructure
To install Sidero and the other Talos providers, simply issue:
```bash
clusterctl init -b talos -c talos -i sidero
SIDERO_METADATA_SERVER_HOST_NETWORK=true \
SIDERO_METADATA_SERVER_PORT=9091 \
SIDERO_CONTROLLER_MANAGER_HOST_NETWORK=true \
SIDERO_CONTROLLER_MANAGER_API_ENDPOINT=$PUBLIC_IP \
clusterctl init -b talos -c talos -i sidero
```
## Patch Components
We will now want to ensure that the Sidero services that got created are publicly accessible across our subnet.
This will allow the metal machines to speak to these services later.
### Patch the Metadata Server
Update the metadata server component with the following patches:
```bash
## Update args to use 9091 for port
kubectl patch deploy -n sidero-system sidero-metadata-server --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args", "value": ["--port=9091"]}]'
## Tweak container port to match
kubectl patch deploy -n sidero-system sidero-metadata-server --type='json' -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/ports", "value": [{"containerPort": 9091,"name": "http"}]}]'
## Use host networking
kubectl patch deploy -n sidero-system sidero-metadata-server --type='json' -p='[{"op": "add", "path": "/spec/template/spec/hostNetwork", "value": true}]'
```
### Patch the Metal Controller Manager
```bash
## Update args to specify the api endpoint to use for registration
kubectl patch deploy -n sidero-system sidero-controller-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/1/args", "value": ["--api-endpoint='$PUBLIC_IP'","--metrics-addr=127.0.0.1:8080","--enable-leader-election"]}]'
## Use host networking
kubectl patch deploy -n sidero-system sidero-controller-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/hostNetwork", "value": true}]'
```
These variables above will allow the metal machines to speak to these services later.
## Register the Servers
Expand All @@ -179,7 +155,7 @@ Servers can be accepted by issuing a patch command like:
kubectl patch server 00000000-0000-0000-0000-d05099d33360 --type='json' -p='[{"op": "replace", "path": "/spec/accepted", "value": true}]'
```
For more information on server acceptance, see the [server docs](/docs/v0.1/configuration/servers).
For more information on server acceptance, see the [server docs](/docs/v0.3/configuration/servers).
## Create the Default Environment
Expand Down Expand Up @@ -227,7 +203,7 @@ EOF
We must now create a server class to wrap our servers we registered.
This is necessary for using the Talos control plane provider for Cluster API.
The qualifiers needed for your server class will differ based on the data provided by your registration flow.
See the [server class docs](/docs/v0.1/configuration/serverclasses) for more info on how these work.
See the [server class docs](/docs/v0.3/configuration/serverclasses) for more info on how these work.
Here is an example of how to apply the server class once you have the proper info:
Expand All @@ -250,7 +226,7 @@ In order to fetch hardware information, you can use
kubectl get server -o yaml
```
Note that for bare-metal setup, you would need to specify an installation disk. See the [Installation Disk](/docs/v0.1/configuration/servers/#installation-disk)
Note that for bare-metal setup, you would need to specify an installation disk. See the [Installation Disk](/docs/v0.3/configuration/servers/#installation-disk)
Once created, you should see the servers that make up your server class appear as "available":
Expand Down
4 changes: 2 additions & 2 deletions docs/website/content/docs/v0.3/Guides/first-cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ There will be two main steps in this guide: reconfiguring the Sidero components
In this guide, we will convert the metadata service to a NodePort service and the other services to use host networking.
This is also necessary because some protocols like TFTP don't allow for port configuration.
Along with some nodeSelectors and a scale up of the metal controller manager deployment, creating the services this way allows for the creation of DNS names that point to all management plane nodes and provide an HA experience if desired.
It should also be noted, however, that there are many options for acheiving this functionality.
It should also be noted, however, that there are many options for achieving this functionality.
Users can look into projects like MetalLB or KubeRouter with BGP and ECMP if they desire something else.

Metal Controller Manager:
Expand Down Expand Up @@ -104,7 +104,7 @@ Servers can be accepted by issuing a patch command like:
kubectl patch server 00000000-0000-0000-0000-d05099d33360 --type='json' -p='[{"op": "replace", "path": "/spec/accepted", "value": true}]'
```
For more information on server acceptance, see the [server docs](/docs/v0.1/configuration/servers).
For more information on server acceptance, see the [server docs](/docs/v0.3/configuration/servers).
## Create the Cluster
Expand Down
132 changes: 11 additions & 121 deletions sfyra/pkg/capi/capi.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,11 @@ package capi

import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"strings"
"time"
"os"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/strategicpatch"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/cluster-api/cmd/clusterctl/client"
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -125,6 +119,16 @@ func (clusterAPI *Manager) Install(ctx context.Context) error {
return err
}

// set template environment variables
os.Setenv("SIDERO_METADATA_SERVER_PORT", "9091")
os.Setenv("SIDERO_METADATA_SERVER_HOST_NETWORK", "true")

os.Setenv("SIDERO_CONTROLLER_MANAGER_HOST_NETWORK", "true")
os.Setenv("SIDERO_CONTROLLER_MANAGER_API_ENDPOINT", clusterAPI.cluster.SideroComponentsIP().String())
os.Setenv("SIDERO_CONTROLLER_MANAGER_SERVER_REBOOT_TIMEOUT", "30s") // wiping/reboot is fast in the test environment
os.Setenv("SIDERO_CONTROLLER_MANAGER_TEST_POWER_EXPLICIT_FAILURE", fmt.Sprintf("%f", clusterAPI.options.PowerSimulatedExplicitFailureProb))
os.Setenv("SIDERO_CONTROLLER_MANAGER_TEST_POWER_SILENT_FAILURE", fmt.Sprintf("%f", clusterAPI.options.PowerSimulatedSilentFailureProb))

options := client.InitOptions{
Kubeconfig: kubeconfig,
CoreProvider: "",
Expand All @@ -139,120 +143,6 @@ func (clusterAPI *Manager) Install(ctx context.Context) error {
_, err = clusterAPI.clientset.CoreV1().Namespaces().Get(ctx, "sidero-system", metav1.GetOptions{})
if err != nil {
_, err = clusterAPI.client.Init(options)
if err != nil {
return err
}
}

return clusterAPI.patch(ctx)
}

func (clusterAPI *Manager) patch(ctx context.Context) error {
const (
sideroNamespace = "sidero-system"
sideroMetadataServer = "sidero-metadata-server"
sideroControllerManager = "sidero-controller-manager"
)

// sidero-metadata-server
deployment, err := clusterAPI.clientset.AppsV1().Deployments(sideroNamespace).Get(ctx, sideroMetadataServer, metav1.GetOptions{})
if err != nil {
return err
}

oldDeployment, err := json.Marshal(deployment)
if err != nil {
return err
}

argsPatched := false

for _, arg := range deployment.Spec.Template.Spec.Containers[0].Args {
if arg == "--port=9091" {
argsPatched = true
}
}

if !argsPatched {
deployment.Spec.Template.Spec.Containers[0].Args = append(deployment.Spec.Template.Spec.Containers[0].Args, "--port=9091")
}

deployment.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{
{
ContainerPort: 9091,
HostPort: 9091,
Name: "http",
Protocol: corev1.ProtocolTCP,
},
}
deployment.Spec.Template.Spec.HostNetwork = true
deployment.Spec.Strategy.RollingUpdate = nil
deployment.Spec.Strategy.Type = appsv1.RecreateDeploymentStrategyType

newDeployment, err := json.Marshal(deployment)
if err != nil {
return err
}

patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldDeployment, newDeployment, appsv1.Deployment{})
if err != nil {
return fmt.Errorf("failed to create two way merge patch: %w", err)
}

_, err = clusterAPI.clientset.AppsV1().Deployments(sideroNamespace).Patch(ctx, deployment.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{
FieldManager: "sfyra",
})
if err != nil {
return err
}

// sidero-controller-manager
deployment, err = clusterAPI.clientset.AppsV1().Deployments(sideroNamespace).Get(ctx, sideroControllerManager, metav1.GetOptions{})
if err != nil {
return err
}

oldDeployment, err = json.Marshal(deployment)
if err != nil {
return err
}

apiPatch := false

for _, arg := range deployment.Spec.Template.Spec.Containers[1].Args {
if strings.HasPrefix(arg, "--api-endpoint") {
apiPatch = true
}
}

if !apiPatch {
deployment.Spec.Template.Spec.Containers[1].Args = append(
deployment.Spec.Template.Spec.Containers[1].Args,
fmt.Sprintf("--api-endpoint=%s", clusterAPI.cluster.SideroComponentsIP()),
fmt.Sprintf("--server-reboot-timeout=%s", 30*time.Second), // wiping/reboot is fast in the test environment
fmt.Sprintf("--test-power-simulated-explicit-failure-prob=%f", clusterAPI.options.PowerSimulatedExplicitFailureProb),
fmt.Sprintf("--test-power-simulated-silent-failure-prob=%f", clusterAPI.options.PowerSimulatedSilentFailureProb),
)
}

deployment.Spec.Template.Spec.HostNetwork = true
deployment.Spec.Strategy.RollingUpdate = nil
deployment.Spec.Strategy.Type = appsv1.RecreateDeploymentStrategyType

newDeployment, err = json.Marshal(deployment)
if err != nil {
return err
}

patchBytes, err = strategicpatch.CreateTwoWayMergePatch(oldDeployment, newDeployment, appsv1.Deployment{})
if err != nil {
return fmt.Errorf("failed to create two way merge patch: %w", err)
}

_, err = clusterAPI.clientset.AppsV1().Deployments(sideroNamespace).Patch(ctx, deployment.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{
FieldManager: "sfyra",
})
if err != nil {
return err
}

Expand Down

0 comments on commit 1e33dcd

Please sign in to comment.