Skip to content

Commit

Permalink
Add BaremetalHost annotation-based fencing
Browse files Browse the repository at this point in the history
This commit adds one more way of fencing a compute node,
using baremetal apis (metal3).

It works by adding a reboot annotation, see:
https://book.metal3.io/bmo/reboot_annotation.html

{"annotations":{"reboot.metal3.io/iha":"{\"mode\": \"hard\"}"}}

After the evacuation is completed this annotation is removed and
the compute host powered back on.

fencing.yaml should contain something like:

  FencingConfig:
    edpm-compute-1: [hostname as known by nova]
      agent: bmh
      namespace: openstack [namespace of the bmh]
      token: <long-token>
      host: edpm-compute-1 [name of the bmh resource]

The token is obtained with something like the following:

kubectl create serviceaccount k8sadmin -n kube-system
kubectl create clusterrolebinding k8sadmin --clusterrole=cluster-admin --serviceaccount=kube-system:k8sadmin
kubectl -n kube-system describe secret $(sudo kubectl -n kube-system get secret | (grep k8sadmin || echo "$_") | awk '{print $1}') | grep token: | awk '{print $2}'

Customers should create a serviceaccount that has enough rights to
perform operations on the baremetalhost resources instead of using
a cluster-admin.
  • Loading branch information
lmiccini committed Oct 12, 2024
1 parent 1da98a9 commit 3b9f188
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions templates/instanceha/bin/instanceha.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,41 @@ def _redfish_reset(url, user, passwd, timeout, action):
r = requests.post(url, data=json.dumps(payload), headers=headers, auth=(user, passwd), verify=False, timeout=timeout)
return r

def _bmh_fence(token, namespace, host, action):

url = "https://kubernetes.default.svc/apis/metal3.io/v1alpha1/namespaces/%s/baremetalhosts/%s?fieldManager=kubectl-patch" % (namespace, host)
headers={'Authorization': 'Bearer '+token, 'Content-Type': 'application/merge-patch+json'}
cacert = '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt'

if action == 'off':
ann={"metadata":{"annotations":{"reboot.metal3.io/iha":"{\"mode\": \"hard\"}"}}}
r = requests.patch(url, headers=headers, verify=cacert, data=json.dumps(ann))
if r.status_code == 200:
#check if server is off - wait up to 30s - TODO this may need tuning

headers={'Authorization': 'Bearer '+token}
url = "https://kubernetes.default.svc/apis/metal3.io/v1alpha1/namespaces/%s/baremetalhosts/%s" % (namespace, host)

break_loop = False
i=0
while not break_loop:
time.sleep(3)
s = requests.get(url, headers=headers, verify=cacert)
poweredon = json.loads(s.text)['status']['poweredOn']
i += 1
if i > 9 or not poweredon:
break_loop = True
if not poweredon:
return True
else:
return False

else:
return False
else:
ann={"metadata":{"annotations":{"reboot.metal3.io/iha":None}}}
r = requests.patch(url, headers=headers, verify=cacert, data=json.dumps(ann))
return r

def _host_fence(host, action):
logging.info('Fencing host %s %s' % (host, action))
Expand Down Expand Up @@ -589,6 +624,27 @@ def _host_fence(host, action):
logging.warning('Could not power on %s' % host)
#return True

elif 'bmh' in fencing_data["agent"]:

token = str(fencing_data["token"])
host = str(fencing_data["host"])
namespace = str(fencing_data["namespace"])

if action == 'off':
r = _bmh_fence(token, namespace, host, "off")
if r:
logging.info('Power off of %s ok' % host)
return True
else:
logging.error('Could not power off %s' % host)
return False
else:
r = _bmh_fence(token, namespace, host, "on")
if r.status_code == 200:
logging.info('Power on of %s ok' % host)
else:
logging.warning('Could not power on %s' % host)

else:
logging.error('No valid fencing method detected for %s' % host)
return False
Expand Down

0 comments on commit 3b9f188

Please sign in to comment.