Skip to content

Commit

Permalink
Intermediate progress
Browse files Browse the repository at this point in the history
Signed-off-by: Tyler Gu <[email protected]>
  • Loading branch information
tylergu committed Jan 30, 2024
1 parent 7ba725a commit 84c1bd9
Show file tree
Hide file tree
Showing 3 changed files with 270 additions and 189 deletions.
51 changes: 31 additions & 20 deletions acto/kubernetes_engine/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import subprocess
import time
from abc import ABC, abstractmethod
from typing import Callable, Dict, List
from typing import Callable, Optional

import kubernetes

Expand All @@ -12,18 +12,24 @@


class KubernetesEngine(ABC):
"""Interface for KubernetesEngine"""

@abstractmethod
def __init__(self, acto_namespace: int,
posthooks: List[KubernetesEnginePostHookType] = None,
feature_gates: Dict[str, bool] = None) -> None: ...
'''Constructor for KubernetesEngine
Args:
acto_namespace: the namespace of the acto
posthooks: a list of posthooks to be executed after the cluster is created
feature_gates: a list of feature gates to be enabled
'''
def __init__(
self,
acto_namespace: int,
posthooks: Optional[list[KubernetesEnginePostHookType]] = None,
feature_gates: Optional[dict[str, bool]] = None,
num_nodes=1,
version="",
) -> None:
"""Constructor for KubernetesEngine
Args:
acto_namespace: the namespace of the acto
posthooks: a list of posthooks to be executed after the cluster is created
feature_gates: a list of feature gates to be enabled
"""

@abstractmethod
def configure_cluster(self, num_nodes: int, version: str):
Expand All @@ -42,38 +48,43 @@ def load_images(self, images_archive_path: str, name: str):
pass

@abstractmethod
def delete_cluster(self, name: str, kubeconfig: str, ):
def delete_cluster(
self,
name: str,
kubeconfig: str,
):
pass

def restart_cluster(self, name: str, kubeconfig: str):
logger = get_thread_logger(with_prefix=False)

retry_count = 3

while (retry_count > 0):
while retry_count > 0:

Check warning on line 63 in acto/kubernetes_engine/base.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 63
try:
self.delete_cluster(name, kubeconfig)
time.sleep(1)
self.create_cluster(name, kubeconfig)
time.sleep(1)
logger.info('Created cluster')
logger.info("Created cluster")

Check warning on line 69 in acto/kubernetes_engine/base.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 69
except Exception as e:
logger.warning(
"%s happened when restarting cluster, retrying...", e)
"%s happened when restarting cluster, retrying...", e
)
retry_count -= 1
if retry_count == 0:
raise e
continue
break

def get_node_list(self, name: str):
'''Fetch the name of worker nodes inside a cluster
"""Fetch the name of worker nodes inside a cluster
Args:
1. name: name of the cluster name
'''
"""
logger = get_thread_logger(with_prefix=False)

cmd = ['docker', 'ps', '--format', '{{.Names}}', '-f']
cmd = ["docker", "ps", "--format", "{{.Names}}", "-f"]

if name == None:
cmd.append(f"name={CONST.CLUSTER_NAME}")
Expand All @@ -82,7 +93,7 @@ def get_node_list(self, name: str):

p = subprocess.run(cmd, capture_output=True, text=True)

if p.stdout == None or p.stdout == '':
if p.stdout == None or p.stdout == "":
# no nodes can be found, returning an empty array
return []
return p.stdout.strip().split('\n')
return p.stdout.strip().split("\n")
146 changes: 83 additions & 63 deletions acto/kubernetes_engine/kind.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import subprocess
import time
from typing import Dict, List
from typing import Any, Dict, List

import kubernetes
import yaml
Expand All @@ -14,97 +14,114 @@


class Kind(base.KubernetesEngine):

def __init__(
self, acto_namespace: int, posthooks: List[base.KubernetesEnginePostHookType] = None,
feature_gates: Dict[str, bool] = None):
self._config_path = os.path.join(CONST.CLUSTER_CONFIG_FOLDER, f'KIND-{acto_namespace}.yaml')
self,
acto_namespace: int,
posthooks: List[base.KubernetesEnginePostHookType] = None,
feature_gates: Dict[str, bool] = None,
):
self._config_path = os.path.join(
CONST.CLUSTER_CONFIG_FOLDER, f"KIND-{acto_namespace}.yaml"
)
self._posthooks = posthooks
self._feature_gates = feature_gates

def configure_cluster(self, num_nodes: int, version: str):
'''Create config file for kind'''
config_dict = {}
config_dict['kind'] = 'Cluster'
config_dict['apiVersion'] = 'kind.x-k8s.io/v1alpha4'
config_dict['nodes'] = []
"""Create config file for kind"""
config_dict: dict[str, Any] = {}
config_dict["kind"] = "Cluster"
config_dict["apiVersion"] = "kind.x-k8s.io/v1alpha4"
config_dict["nodes"] = []
extra_mounts = []
extra_mounts.append({'hostPath': 'profile/data', 'containerPath': '/tmp/profile'})
extra_mounts.append(
{"hostPath": "profile/data", "containerPath": "/tmp/profile"}
)
for _ in range(num_nodes - 1):
config_dict['nodes'].append({
'role': 'worker',
'extraMounts': [{
'hostPath': 'profile/data',
'containerPath': '/tmp/profile'
}]
})
config_dict["nodes"].append(
{
"role": "worker",
"extraMounts": [
{
"hostPath": "profile/data",
"containerPath": "/tmp/profile",
}
],
}
)
for _ in range(1):
config_dict['nodes'].append({
'role': 'control-plane',
'extraMounts': [{
'hostPath': 'profile/data',
'containerPath': '/tmp/profile'
}]
})
config_dict["nodes"].append(
{
"role": "control-plane",
"extraMounts": [
{
"hostPath": "profile/data",
"containerPath": "/tmp/profile",
}
],
}
)

if self._feature_gates:
config_dict['featureGates'] = {}
config_dict["featureGates"] = {}
for key, value in self._feature_gates.items():
config_dict['featureGates'][key] = value
config_dict["featureGates"][key] = value

Check warning on line 67 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on lines 65-67

try:
os.mkdir(CONST.CLUSTER_CONFIG_FOLDER)
except FileExistsError:
pass

with open(self._config_path, 'w') as config_file:
with open(self._config_path, "w") as config_file:
yaml.dump(config_dict, config_file)

self._k8s_version = version

def get_context_name(self, cluster_name: str) -> str:
'''Returns the kubecontext based onthe cluster name
"""Returns the kubecontext based onthe cluster name
KIND always adds `kind` before the cluster name
'''
return f'kind-{cluster_name}'
"""
return f"kind-{cluster_name}"

def create_cluster(self, name: str, kubeconfig: str):
'''Use subprocess to create kind cluster
"""Use subprocess to create kind cluster
Args:
name: name of the kind cluster
config: path of the config file for cluster
version: k8s version
'''
print_event('Creating a Kind cluster...')
cmd = ['kind', 'create', 'cluster']
"""
print_event("Creating a Kind cluster...")
cmd = ["kind", "create", "cluster"]

if name:
cmd.extend(['--name', name])
cmd.extend(["--name", name])
else:
cmd.extend(['--name', CONST.CLUSTER_NAME])
cmd.extend(["--name", CONST.CLUSTER_NAME])

Check warning on line 98 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 98

if kubeconfig:
logging.info(f'Kubeconfig: {kubeconfig}')
cmd.extend(['--kubeconfig', kubeconfig])
logging.info(f"Kubeconfig: {kubeconfig}")
cmd.extend(["--kubeconfig", kubeconfig])
else:
raise Exception('Missing kubeconfig for kind create')
raise Exception("Missing kubeconfig for kind create")

Check warning on line 104 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 104

cmd.extend(['--config', self._config_path])
cmd.extend(["--config", self._config_path])

cmd.extend(['--image', f"kindest/node:{self._k8s_version}"])
cmd.extend(["--image", f"kindest/node:{self._k8s_version}"])

p = subprocess.run(cmd)
while p.returncode != 0:
# TODO: retry for three times
logging.error('Failed to create kind cluster, retrying')
logging.error("Failed to create kind cluster, retrying")

Check warning on line 113 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 113
self.delete_cluster(name, kubeconfig)
time.sleep(5)
p = subprocess.run(cmd)

try:
kubernetes.config.load_kube_config(config_file=kubeconfig,
context=self.get_context_name(name))
apiclient = kubernetes_client(kubeconfig, self.get_context_name(name))
kubernetes.config.load_kube_config(
config_file=kubeconfig, context=self.get_context_name(name)
)
apiclient = kubernetes_client(
kubeconfig, self.get_context_name(name)
)
except Exception as e:
logging.debug("Incorrect kube config file:")
with open(kubeconfig) as f:
Expand All @@ -116,49 +133,52 @@ def create_cluster(self, name: str, kubeconfig: str):
posthook(apiclient=apiclient)

def load_images(self, images_archive_path: str, name: str):
logging.info('Loading preload images')
cmd = ['kind', 'load', 'image-archive']
logging.info("Loading preload images")
cmd = ["kind", "load", "image-archive"]
if images_archive_path == None:
logging.warning('No image to preload, we at least should have operator image')
logging.warning(
"No image to preload, we at least should have operator image"
)

if name != None:
cmd.extend(['--name', name])
cmd.extend(["--name", name])
else:
logging.error('Missing cluster name for kind load')
logging.error("Missing cluster name for kind load")

p = subprocess.run(cmd + [images_archive_path])
if p.returncode != 0:
logging.error('Failed to preload images archive')
logging.error("Failed to preload images archive")

Check warning on line 150 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on lines 136-150

def delete_cluster(self, name: str, kubeconfig: str):
cmd = ['kind', 'delete', 'cluster']
cmd = ["kind", "delete", "cluster"]

if name:
cmd.extend(['--name', name])
cmd.extend(["--name", name])
else:
logging.error('Missing cluster name for kind delete')
logging.error("Missing cluster name for kind delete")

Check warning on line 158 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 158

if kubeconfig:
cmd.extend(['--kubeconfig', kubeconfig])
cmd.extend(["--kubeconfig", kubeconfig])
else:
raise Exception('Missing kubeconfig for kind create')
raise Exception("Missing kubeconfig for kind create")

Check warning on line 163 in acto/kubernetes_engine/kind.py

View workflow job for this annotation

GitHub Actions / coverage-report

Missing coverage

Missing coverage on line 163

while subprocess.run(cmd).returncode != 0:
continue

def get_node_list(self, name: str):
'''Get agent containers list of a K3S cluster
"""Get agent containers list of a K3S cluster
Args:
1. Name of the cluster
'''
worker_name_template = '%s-worker'
control_plane_name_template = '%s-control-plane'
"""
worker_name_template = "%s-worker"
control_plane_name_template = "%s-control-plane"

if name == None:
name = CONST.CLUSTER_NAME

res = super().get_node_list(worker_name_template % name) + \
super().get_node_list(control_plane_name_template % name)
res = super().get_node_list(
worker_name_template % name
) + super().get_node_list(control_plane_name_template % name)

if len(res) == 0:
# no worker node can be found
Expand Down
Loading

0 comments on commit 84c1bd9

Please sign in to comment.