Skip to content

Commit

Permalink
Added LdomsLsCollector
Browse files Browse the repository at this point in the history
  • Loading branch information
n27051538 committed May 4, 2022
1 parent 72541f3 commit 3d61310
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 18 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ Also work on x86 platform, community-tested with Openindiana (x86) (OI-hipster-m
- 2020 Dec 17. Added PrtdiagCollector, MetaStatCollector, MetaDBCollector
- 2021 Jan 05. Added TextFileCollector, SVCSCollector now enabled for all zones (Thanks to Marcel Peter)
- 2021 Mar 01. [Fixed](https://github.com/n27051538/solaris_exporter/issues/4) psutil version to 5.7.0 (something changed in the newer versions, have to time to look at)
- 2022 Jan 24. Added support for Python 3. In testing.
- 2022 Jan 24. Added support for Python 3.
- 2022 Feb 04. Documentation update for support of Solaris 11.4.41.
- 2022 Feb 05. [Fixed](https://github.com/n27051538/solaris_exporter/issues/7) support of Python 2.7 for Solaris 11.4.41.

- 2022 May 04. Added LdomsLsCollector due to [discussion](https://github.com/n27051538/solaris_exporter/discussions/11).


## Provides info about:
- Solaris Zones CPU Usage with processor sets info (PerZoneCpuCollector);
- Solaris Zones Virtual Memory (SWAP) Resource Capping (PerZoneCapsCollector);
Expand Down
43 changes: 42 additions & 1 deletion output_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -596,4 +596,45 @@ solaris_exporter_per_zone_caps_processing 0.21715593338012695
# TYPE solaris_exporter_fc_paths_timeouts_total counter
solaris_exporter_fc_paths_timeouts_total 0.0
# TYPE solaris_exporter_fc_paths_timeouts_created gauge
solaris_exporter_fc_paths_timeouts_created 1.586252434625353e+09
solaris_exporter_fc_paths_timeouts_created 1.586252434625353e+09
# HELP solaris_exporter_ldoms ldoms counters
# TYPE solaris_exporter_ldoms gauge
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="ncpu"} 32.0
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="mem"} 3.4359738368e+010
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="util"} 0.5
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="uptime_seconds"} 659559.0
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="norm_util"} 0.4
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="state"} 0.0
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="flags"} 10110.0
solaris_exporter_ldoms{host="host01",ldom="primary",statistic="console_port"} -1.0
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="ncpu"} 48.0
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="mem"} 2.147483648e+011
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="util"} 0.7
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="uptime_seconds"} 659576.0
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="norm_util"} 0.6
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="state"} 0.0
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="flags"} 10000.0
solaris_exporter_ldoms{host="host01",ldom="dom02",statistic="console_port"} 5002.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="ncpu"} 48.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="mem"} 2.147483648e+011
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="util"} -1.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="uptime_seconds"} -1.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="norm_util"} -1.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="state"} 2.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="flags"} 0.0
solaris_exporter_ldoms{host="host01",ldom="dom01",statistic="console_port"} 0.0
# HELP solaris_exporter_ldom_collector_timeouts_total Number of times when ldom collector ran more than 3 seconds
# TYPE solaris_exporter_ldom_collector_timeouts_total counter
solaris_exporter_ldom_collector_timeouts_total 0.0
# HELP solaris_exporter_ldom_collector_timeouts_created Number of times when ldom collector ran more than 3 seconds
# TYPE solaris_exporter_ldom_collector_timeouts_created gauge
solaris_exporter_ldom_collector_timeouts_created 1.651672718173066e+09
# HELP solaris_exporter_ldom_collector_errors_total Number of times when ldom collector ran with errors
# TYPE solaris_exporter_ldom_collector_errors_total counter
solaris_exporter_ldom_collector_errors_total 0.0
# HELP solaris_exporter_ldom_collector_errors_created Number of times when ldom collector ran with errors
# TYPE solaris_exporter_ldom_collector_errors_created gauge
solaris_exporter_ldom_collector_errors_created 1.651672718173104e+09
# HELP solaris_exporter_ldom_collector_processing Time spent processing request
# TYPE solaris_exporter_ldom_collector_processing gauge
solaris_exporter_ldom_collector_processing 0.10866618156433105
163 changes: 153 additions & 10 deletions solaris_exporter.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
#!/usr/bin/python
"""
solaris_exporter.py
version v2022Feb05
version v2022May04
2020 Jan 31. Initial
2020 Feb 04. Added UpTime in UpTimeCollector.
2020 Feb 09. Added DiskErrorCollector, ZpoolCollector, FmadmCollector, SVCSCollector, FCinfoCollector
2020 Dec 17. Added PrtdiagCollector, MetaStatCollector, MetaDBCollector
2021 Jan 05. Added TextFileCollector, SVCSCollector now enabled for all zones (Thanks to Marcel Peter)
2021 Mar 01. Fixed psutil version to 5.7.0 (something changed in the newer versions, have to time to look at)
2022 Jan 24. Added support for Python 3.7. In testing.
2022 Feb 04. Documentation update for support of Solaris 11.4.41. In testing.
2022 Feb 05. Fixed support of Python 2.7 for Solaris 11.4.41
(https://github.com/n27051538/solaris_exporter/issues/7).
2022 Jan 24. Added support for Python 3.7.
2022 Feb 04. Documentation update for support of Solaris 11.4.41.
2022 Feb 05. Fixed support of Python 2.7 for Solaris 11.4.41 (https://github.com/n27051538/solaris_exporter/issues/7).
2022 May 04. Added LdomsLsCollector (https://github.com/n27051538/solaris_exporter/discussions/11) .
Written by Alexander Golikov for collecting SPARC Solaris metrics for Prometheus.
Tested on Solaris 11.3.25, 11.4.4, 11.4.41, 10u11(limited) SPARC.
Also work on x86 platform, community-tested with Openindiana (x86) (OI-hipster-minimal-20201031.iso) and Solaris10u11.
Also work on x86 platform, community-tested with Openindiana (x86) (OI-hipster-minimal-20201031.iso)
This exporter provides info about:
- Solaris Zones CPU Usage with processor sets info (PerZoneCpuCollector);
Expand Down Expand Up @@ -1062,9 +1062,148 @@ def collect(self):
yield family
text_object.close

class LdomsLsCollector(object):
"""
Read input from 'ldm list' command
"""
# timeout how match seconds is allowed to collect data
max_time_to_run = 3
ldom_collector_timeouts = Counter('solaris_exporter_ldom_collector_timeouts',
'Number of times when ldom collector ran' +
' more than ' + str(max_time_to_run) + ' seconds')
ldom_collector_errors = Counter('solaris_exporter_ldom_collector_errors',
'Number of times when ldom collector ran with errors')
ldom_collector_run_time = Gauge('solaris_exporter_ldom_collector_processing',
'Time spent processing request')

def collect(self):
with self.ldom_collector_run_time.time():
ldoms = GaugeMetricFamily("solaris_exporter_ldoms",
'ldoms counters',
labels=['ldom', 'statistic', 'host'])
output, task_return_code, task_timeouted = run_shell_command('/usr/sbin/ldm list -p', self.max_time_to_run)
if task_return_code == 0 and task_timeouted is False:
lines = output.splitlines()
for line in lines:
keyvalue = line.split("|")
if keyvalue[0].startswith('VERSION '):
ldm_version=keyvalue[0].split(" ")[1].split(".")
# TODO: compatible version check here, tested for VERSION 1.21
# if float(ldm_version[0]) != 1 and float(ldm_version[1]) != 21:
# print("Version " + ldm_version[0] + "." + ldm_version[1] + " is not tested with 'ldm list'")
# self.ldom_collector_errors.inc()
# break
continue
if keyvalue[0] == "DOMAIN":
#DOMAIN|name=dom50|state=active|flags=-n----|cons=5001|ncpu=88|mem=182536110080|util=5.7|uptime=17930944|norm_util=5.7
ldom_name = keyvalue[1].split("=")[1]

ldom_state = keyvalue[2].split("=")[1]
if ldom_state == "active":
ldom_state = 0
elif ldom_state == "bound":
ldom_state = 1
elif ldom_state == "inactive":
ldom_state = 2
else:
ldom_state = 3

#ldom flags is coded flag-state of domain, see comments
ldom_flags = 0
ldom_flags_all = keyvalue[3].split("=")[1]

#should be not less then 4 - number of flag variants per position
ldom_flags_code_base = 10

#Column 1 - Starting or stopping domains:
# s starting or stopping
if ldom_flags_all[0] == 's':
ldom_flags = ldom_flags + 1
ldom_flags = ldom_flags * ldom_flags_code_base

#Column 2 - Domain status:
# n normal
# t transition
# d degraded domain that cannot be started due to missing resources
if ldom_flags_all[1] == 'n':
ldom_flags = ldom_flags + 1
elif ldom_flags_all[1] == 't':
ldom_flags = ldom_flags + 2
elif ldom_flags_all[1] == 'd':
ldom_flags = ldom_flags + 3
ldom_flags = ldom_flags * ldom_flags_code_base

# Column 3 - Reconfiguration status:
# d delayed reconfiguration
# r memory dynamic reconfiguration
if ldom_flags_all[2] == 'd':
ldom_flags = ldom_flags + 1
elif ldom_flags_all[2] == 'r':
ldom_flags = ldom_flags + 2
ldom_flags = ldom_flags * ldom_flags_code_base

# Column 4 - Control domain
# c control domain
if ldom_flags_all[3] == 'c':
ldom_flags = ldom_flags + 1
ldom_flags = ldom_flags * ldom_flags_code_base

# Column 5 - Service domain
# v virtual I/O service domain
if ldom_flags_all[4] == 'v':
ldom_flags = ldom_flags + 1
ldom_flags = ldom_flags * ldom_flags_code_base

# Column 6 - Migration status
# s source domain in a migration
# t target domain in a migration
# e error occurred during a migration
if ldom_flags_all[5] == 's':
ldom_flags = ldom_flags + 1
elif ldom_flags_all[5] == 't':
ldom_flags = ldom_flags + 2
elif ldom_flags_all[5] == 'e':
ldom_flags = ldom_flags + 3

ldom_cons = keyvalue[4].split("=")[1]
if ldom_cons == "UART":
ldom_cons = -1
elif ldom_cons == "":
ldom_cons = 0

ldom_ncpu = keyvalue[5].split("=")[1]
if ldom_ncpu == "":
ldom_ncpu = 0
ldom_mem = keyvalue[6].split("=")[1]
if ldom_mem == "":
ldom_mem = 0
ldom_util = keyvalue[7].split("=")[1]
if ldom_util == "":
ldom_util = "-1"
ldom_uptime = keyvalue[8].split("=")[1]
if ldom_uptime == "":
ldom_uptime = "-1"
ldom_norm_util = keyvalue[9].split("=")[1]
if ldom_norm_util == "":
ldom_norm_util = "-1"

#print("ldom " + ldom_name + " uptime '" + ldom_uptime + "'")
ldoms.add_metric([ldom_name, "ncpu", host_name], float(ldom_ncpu))
ldoms.add_metric([ldom_name, "mem", host_name], float(ldom_mem))
ldoms.add_metric([ldom_name, "util", host_name], float(ldom_util))
ldoms.add_metric([ldom_name, "uptime_seconds", host_name], float(ldom_uptime))
ldoms.add_metric([ldom_name, "norm_util", host_name], float(ldom_norm_util))
ldoms.add_metric([ldom_name, "state", host_name], float(ldom_state))
ldoms.add_metric([ldom_name, "flags", host_name], float(ldom_flags))
ldoms.add_metric([ldom_name, "console_port", host_name], float(ldom_cons))

else:
self.ldom_collector_errors.inc()
if task_timeouted:
self.ldom_collector_timeouts.inc()
yield ldoms


# replace start_http_server() method to capture error messages in my_http_error_handler()
# remove this to revert to prometheus_client.start_http_server

try:
# Python 2.7
Expand Down Expand Up @@ -1096,8 +1235,6 @@ def my_http_error_handler(request, client_address):
t.start()


# end of replace start_http_server()


if __name__ == '__main__':
assert psutil.SUNOS, 'This program is for Solaris OS only. See installation doc in its header'
Expand Down Expand Up @@ -1131,6 +1268,12 @@ def my_http_error_handler(request, client_address):
if zone != "global":
nzones += 1

ldoms, rc, timeouted = run_shell_command('/usr/sbin/ldm list -p', 3)
if ldoms != "":
collectors.extend([
LdomsLsCollector(),
])

zonename, rc, timeouted = run_shell_command('/usr/bin/zonename', 3)
zonename = zonename.strip()
if zonename == "global":
Expand Down
10 changes: 5 additions & 5 deletions solaris_exporter_role.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ user=monitor
profile_name=Prometheus

uid0_commands="
/usr/sbin/fcinfo
/usr/sbin/fmadm
/usr/sbin/nvmeadm
/usr/sbin/raidctl
/usr/sbin/zlogin
"
# Also add priv 'file_dac_search': df -h will be able to view zone filesystems
# Also add priv 'file_dac_search': 'df -h' will be able to view zone filesystems
# Also add auth 'solaris.ldoms.read': 'ldm list' will be able to view domain info

current_profiles=$(profiles ${user} | egrep -v "^${user}:$|^All$|^Basic Solaris User$|^${profile_name}$")
current_profiles=$(echo -n "${current_profiles}" | tr '\n' ',')

function RemoveRole {
usermod -K "defaultpriv-=file_dac_search" ${user} 2>/dev/null
usermod -K "defaultpriv-=file_dac_search,sys_config" -A "-solaris.ldoms.read" ${user} 2>/dev/null
usermod -K "defaultpriv+=basic" ${user} 2>/dev/null

for uid0_command in ${uid0_commands}; do
Expand Down Expand Up @@ -70,7 +70,7 @@ function InstallRole {
fi

usermod -P "${current_profiles}" ${user} 2>/dev/null
usermod -K "defaultpriv+=basic,file_dac_search" ${user} 2>/dev/null
usermod -K "defaultpriv+=basic,file_dac_search,sys_config" -A "+solaris.ldoms.read" ${user} 2>/dev/null
echo "Current profiles of ${user} user is: ${current_profiles}"
}

Expand All @@ -84,4 +84,4 @@ elif [[ "$1" == "remove" ]] ; then
else
echo "Usage: $0 {install|remove}"
exit 4
fi
fi

0 comments on commit 3d61310

Please sign in to comment.