Skip to content
This repository has been archived by the owner on Jan 18, 2023. It is now read-only.

Commit

Permalink
Merge ssh://gitlab.devtools.intel.com:29418/OrchSW/CNO/containers_cpu…
Browse files Browse the repository at this point in the history
…_manager_for_kubernetes
  • Loading branch information
pbrownlow7 committed Jan 29, 2021
2 parents 5d9d30e + fe9ea32 commit 2295a29
Show file tree
Hide file tree
Showing 16 changed files with 192 additions and 97 deletions.
21 changes: 12 additions & 9 deletions cmk.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@
cmk init [--num-exclusive-cores=<num>]
[--num-shared-cores=<num>] [--socket-id=<num>]
[--shared-mode=<mode>] [--exclusive-mode=<mode>]
[--excl-non-isolcpus=<list>]
cmk discover [--no-taint]
[--excl-non-isolcpus=<list>] [--namespace=<name>]
cmk discover [--namespace=<name>] [--no-taint]
cmk describe
cmk reconcile [--publish] [--interval=<seconds>]
cmk reconcile [--publish] [--interval=<seconds>] [--namespace=<name>]
cmk isolate [--socket-id=<num>] --pool=<pool> <command>
[-- <args>...][--no-affinity]
[-- <args>...][--no-affinity] [--namespace=<name>]
cmk install [--install-dir=<dir>]
cmk node-report [--publish] [--interval=<seconds>]
cmk node-report [--publish] [--interval=<seconds>] [--namespace=<name>]
cmk uninstall [--install-dir=<dir>] [--conf-dir=<dir>] [--namespace=<name>]
cmk webhook [--conf-file=<file>] [--cafile=<file>] [--insecure=<bool>]
cmk reconfigure [--node-name=<name>] [--num-exclusive-cores=<num>]
Expand Down Expand Up @@ -139,10 +139,10 @@ def main():
int(args["--num-shared-cores"]),
args["--exclusive-mode"],
args["--shared-mode"],
args["--excl-non-isolcpus"])
args["--excl-non-isolcpus"], args["--namespace"])
return
if args["discover"]:
discover.discover(args["--no-taint"])
discover.discover(args["--namespace"], args["--no-taint"])
return
if args["describe"]:
describe.describe()
Expand All @@ -152,11 +152,13 @@ def main():
args["--no-affinity"],
args["<command>"],
args["<args>"],
args["--namespace"],
args["--socket-id"])
return
if args["reconcile"]:
reconcile.reconcile(int(args["--interval"]),
args["--publish"])
args["--publish"],
args["--namespace"])
return
if args["install"]:
install.install(args["--install-dir"])
Expand All @@ -168,7 +170,8 @@ def main():
return
if args["node-report"]:
nodereport.nodereport(int(args["--interval"]),
args["--publish"])
args["--publish"],
args["--namespace"])
return
if args["webhook"]:
webhook.webhook(args["--conf-file"], args["--cafile"],
Expand Down
12 changes: 7 additions & 5 deletions intel/clusterinit.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ def run_cmd_pods(cmd_list, cmd_init_list, cmk_img, cmk_img_pol,
for cmd in cmd_list:
args = ""
if cmd == "reconcile":
args = "/cmk/cmk.py isolate --pool=infra /cmk/cmk.py -- reconcile --interval=5 --publish" # noqa: E501
args = ("/cmk/cmk.py isolate --pool=infra --namespace={} /cmk/cmk.py -- reconcile --interval=5 --publish --namespace={}").format(namespace, namespace) # noqa: E501
elif cmd == "nodereport":
args = "/cmk/cmk.py isolate --pool=infra /cmk/cmk.py -- node-report --interval=5 --publish" # noqa: E501
args = ("/cmk/cmk.py isolate --pool=infra --namespace={} /cmk/cmk.py -- node-report --interval=5 --publish --namespace={}").format(namespace, namespace) # noqa: E501

update_pod_with_container(pod, cmd, cmk_img, cmk_img_pol, args)
elif cmd_init_list:
Expand All @@ -160,9 +160,10 @@ def run_cmd_pods(cmd_list, cmd_init_list, cmk_img, cmk_img_pol,
if cmd == "init":
args = ("/cmk/cmk.py init --num-exclusive-cores={} "
"--num-shared-cores={} --shared-mode={} "
"--exclusive-mode={} --excl-non-isolcpus={}")\
"--exclusive-mode={} --excl-non-isolcpus={} "
"--namespace={}")\
.format(num_exclusive_cores, num_shared_cores, shared_mode,
exclusive_mode, excl_non_isolcpus)
exclusive_mode, excl_non_isolcpus, namespace)
# If init is the only cmd in cmd_init_list, it should be run
# as regular container as spec.containers is a required field.
# Otherwise, it should be run as init-container.
Expand All @@ -174,7 +175,8 @@ def run_cmd_pods(cmd_list, cmd_init_list, cmk_img, cmk_img_pol,
cmk_img_pol, args)
else:
if cmd == "discover":
args = "/cmk/cmk.py discover"
args = ("/cmk/cmk.py discover --namespace={}")\
.format(namespace)
if no_taint:
args = " ".join([args, "--no-taint"])
elif cmd == "install":
Expand Down
47 changes: 31 additions & 16 deletions intel/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@

class Config:

def __init__(self, cm_name, owner):
def __init__(self, cm_name, owner, cm_namespace):
self.c = None
self.c_data = None
self.cm_name = cm_name
self.owner = owner
self.cm_namespace = cm_namespace

def lock(self):
# The lock function is used to avoid two isolate
Expand All @@ -40,15 +41,16 @@ def lock(self):
time.sleep(random.random())
while True:
try:
c = k8s.get_config_map(None, self.cm_name, "default")
c = k8s.get_config_map(None, self.cm_name, self.cm_namespace)
owner = c.metadata.annotations["Owner"]
if owner != "":
time.sleep(1)
continue
else:
c.metadata.annotations["Owner"] = self.owner
try:
k8s.patch_config_map(None, self.cm_name, c, "default")
k8s.patch_config_map(None, self.cm_name,
c, self.cm_namespace)
except K8sApiException as err:
logging.error("Error while retreiving configmap {}"
.format(self.cm_name))
Expand All @@ -73,7 +75,8 @@ def unlock(self):
}
clusterinit.update_configmap(configmap, self.cm_name, data)
try:
k8s.patch_config_map(None, self.cm_name, configmap, "default")
k8s.patch_config_map(None, self.cm_name,
configmap, self.cm_namespace)
except K8sApiException as err:
logging.error("Error while retreiving configmap {}"
.format(self.cm_name))
Expand Down Expand Up @@ -232,7 +235,7 @@ def as_dict(self):
return result


def new(platform, excl_non_isolcpus, name):
def new(platform, excl_non_isolcpus, name, namespace):
# Creates the new CMK configuration for the node. It create a
# configmap object and POSTs it to the K8s API Server

Expand All @@ -243,20 +246,32 @@ def new(platform, excl_non_isolcpus, name):
config = update_configmap_exclusive("exclusive-non-isolcpus",
platform, config)
config = update_configmap_shared("infra", platform, config)

configmap = k8sclient.V1ConfigMap()
data = {
"config": yaml.dump(config)
}
clusterinit.update_configmap(configmap, name, data)

try:
k8s.create_config_map(None, configmap, "default")
except K8sApiException as err:
logging.error("Exception when creating config map {}"
.format(name))
logging.error(err.reason)
sys.exit(1)

configmap = k8s.get_config_map(None, name, namespace)

if configmap is None:
configmap = k8sclient.V1ConfigMap()
clusterinit.update_configmap(configmap, name, data)

try:
k8s.create_config_map(None, configmap, namespace)
except K8sApiException as err:
logging.error("Exception when creating config map {}"
.format(name))
logging.error(err.reason)
sys.exit(1)
else:
clusterinit.update_configmap(configmap, name, data)
try:
k8s.patch_config_map(None, name, configmap, namespace)
except K8sApiException as err:
logging.error("Error while patching configmap {}"
.format(name))
logging.error(err.reason)
sys.exit(1)


def update_configmap_exclusive(pool_name, platform, config):
Expand Down
14 changes: 7 additions & 7 deletions intel/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# discover reads the CMK configuration file, patches kubernetes nodes with
# appropriate number of CMK Opaque Integer Resource (OIR) slots and applies
# the appropriate CMK node labels and taints.
def discover(no_taint=False):
def discover(namespace, no_taint=False):

version = util.parse_version(k8s.get_kube_version(None))
if version == util.parse_version("v1.8.0"):
Expand All @@ -38,11 +38,11 @@ def discover(no_taint=False):
if version >= util.parse_version("v1.8.1"):
# Patch the node with the appropriate CMK ER.
logging.debug("Patching the node with the appropriate CMK ER.")
add_node_er()
add_node_er(namespace)
else:
# Patch the node with the appropriate CMK OIR.
logging.debug("Patching the node with the appropriate CMK OIR.")
add_node_oir()
add_node_oir(namespace)

# Add appropriate CMK label to the node.
logging.debug("Adding appropriate CMK label to the node.")
Expand All @@ -55,11 +55,11 @@ def discover(no_taint=False):


# add_node_oir patches the node with the appropriate CMK OIR.
def add_node_oir():
def add_node_oir(namespace):
pod_name = os.environ["HOSTNAME"]
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)
c = config.Config(configmap_name, pod_name)
c = config.Config(configmap_name, pod_name, namespace)
c.lock()

num_excl_non_isolcpus = None
Expand Down Expand Up @@ -106,11 +106,11 @@ def add_node_oir():


# add_node_er patches the node with the appropriate CMK extended resources.
def add_node_er():
def add_node_er(namespace):
pod_name = os.environ["HOSTNAME"]
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)
c = config.Config(configmap_name, pod_name)
c = config.Config(configmap_name, pod_name, namespace)
c.lock()

num_excl_non_isolcpus = None
Expand Down
4 changes: 2 additions & 2 deletions intel/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def init(num_exclusive_cores, num_shared_cores,
exclusive_allocation_mode, shared_allocation_mode,
excl_non_isolcpus):
excl_non_isolcpus, namespace):
check_hugepages()

logging.info("Requested exclusive cores = {}.".format(num_exclusive_cores))
Expand All @@ -38,7 +38,7 @@ def init(num_exclusive_cores, num_shared_cores,
pod_name = os.environ["HOSTNAME"]
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)
config.new(platform, excl_non_isolcpus, configmap_name)
config.new(platform, excl_non_isolcpus, configmap_name, namespace)


def configure(num_exclusive_cores, num_shared_cores,
Expand Down
4 changes: 2 additions & 2 deletions intel/isolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@
ENV_NUM_CORES = "CMK_NUM_CORES"


def isolate(pool_name, no_affinity, command, args, socket_id=None):
def isolate(pool_name, no_affinity, command, args, namespace, socket_id=None):
pod_name = os.environ["HOSTNAME"]
if not isinstance(pod_name, str):
logging.error("Pod name is not a string, exiting...")
sys.exit(1)
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)

c = config.Config(configmap_name, pod_name)
c = config.Config(configmap_name, pod_name, namespace)
try:
c.lock()
pools = c.c_data.pools
Expand Down
18 changes: 9 additions & 9 deletions intel/nodereport.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@
k8s, proc, third_party, topology, util, sst_bf as sst


def nodereport(seconds, publish):
def nodereport(seconds, publish, namespace):
if seconds is None:
seconds = 0
else:
seconds = int(seconds)
should_exit = (seconds <= 0)

while True:
report = generate_report()
report = generate_report(namespace)

print(report.json())

Expand Down Expand Up @@ -76,10 +76,10 @@ def nodereport(seconds, publish):
time.sleep(seconds)


def generate_report():
def generate_report(namespace):
report = NodeReport()
check_describe(report)
check_cmk_config(report)
check_describe(report, namespace)
check_cmk_config(report, namespace)
sst_bf = False
try:
sst_bf = bool(discover.get_node_label(sst.NFD_LABEL))
Expand All @@ -91,26 +91,26 @@ def generate_report():
return report


def check_describe(report):
def check_describe(report, namespace):
try:
pod_name = os.environ["HOSTNAME"]
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)
c = config.get_config(configmap_name)
c = config.get_config(configmap_name, namespace)
report.add_description(c.as_dict())
except Exception:
pass


def check_cmk_config(report):
def check_cmk_config(report, namespace):
check_conf = report.add_check("configDirectory")

# Verify we can read the config directory
try:
pod_name = os.environ["HOSTNAME"]
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)
c = config.get_config(configmap_name)
c = config.get_config(configmap_name, namespace)
except Exception:
check_conf.add_error("Unable to read CMK configmap")
return # Nothing more we can check for now
Expand Down
4 changes: 2 additions & 2 deletions intel/reconcile.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
from . import config, proc, third_party, custom_resource, k8s, util


def reconcile(seconds, publish):
def reconcile(seconds, publish, namespace):
pod_name = os.environ["HOSTNAME"]
node_name = k8s.get_node_from_pod(None, pod_name)
configmap_name = "cmk-config-{}".format(node_name)
c = config.Config(configmap_name, pod_name)
c = config.Config(configmap_name, pod_name, namespace)
report = None

if seconds is None:
Expand Down
2 changes: 1 addition & 1 deletion intel/reconfigure.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def reconfigure(node_name, num_exclusive_cores, num_shared_cores,
node_name = k8s.get_node_from_pod(None, pod_name)
config_cm = "cmk-config-{}".format(node_name)

conf = config.Config(config_cm, pod_name)
conf = config.Config(config_cm, pod_name, namespace)
num_exclusive_cores = int(num_exclusive_cores)
num_shared_cores = int(num_shared_cores)

Expand Down
3 changes: 3 additions & 0 deletions intel/webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,16 @@ def mutate(admission_review, mutations_file):

# apply mutation to containers
for i in range(len(pod['spec']['containers'])):
# Need to reload mutations as they were geting changed
mutations = load_mutations(mutations_file)
container = pod['spec']['containers'][i]

pod['spec']['containers'][i] = apply_mutation(container, mutations)

# apply mutation to initContainers which may not exist
try:
for i in range(len(pod['spec']['initContainers'])):
mutations = load_mutations(mutations_file)
container = pod['spec']['initContainers'][i]

pod['spec']['initContainers'][i] = \
Expand Down
Loading

0 comments on commit 2295a29

Please sign in to comment.