From 482ffcda99e161e156f78b0ddf4ab577dfe2064e Mon Sep 17 00:00:00 2001
From: anima
Date: Sun, 23 Feb 2025 03:23:25 +0100
Subject: [PATCH] add check service states
---
checks/check_api_swarmpit.py | 95 +++++++++++++++++++++++++++++++++++-
1 file changed, 93 insertions(+), 2 deletions(-)
diff --git a/checks/check_api_swarmpit.py b/checks/check_api_swarmpit.py
index 84cdd1e..41f7b3d 100644
--- a/checks/check_api_swarmpit.py
+++ b/checks/check_api_swarmpit.py
@@ -6,7 +6,7 @@
- pip3 install requests
"""
-__version__ = '0.1.0'
+__version__ = '0.2.0'
__author__ = 'anima'
# imports
@@ -28,6 +28,16 @@ class SwarmpitAPI:
self.ssl = ssl
def __query(self, query:str, query_type:str = 'get', data:dict = None) -> dict | None:
+ """default query to swarmpit api
+
+ Args:
+ query (str): substring of path (behind /api/)
+ query_type (str, optional): specify type of query, post / get. Defaults to 'get'.
+ data (dict, optional): _description_. Defaults to None.
+
+ Returns:
+ dict | None: return json string as dict or None response
+ """
protocol = 'http'
if self.port is None and self.ssl:
self.port = 443
@@ -58,8 +68,13 @@ class SwarmpitAPI:
return None
def statistics(self):
+ """Cluster statistics"""
return self.__query('stats')
+ def tasks(self):
+ """Task list - aka running services"""
+ return self.__query('tasks')
+
#
# Check Swarm Cores
@@ -81,6 +96,75 @@ class DockerSwarmCoresResource(nagiosplugin.Resource):
return nagiosplugin.Metric(name='load', value=cpu_usage_percent, uom='%', context='scalar_context')
+#
+# Check Swarm services
+#
+class DockerSwarmServiceStatesResource(nagiosplugin.Resource):
+ def __init__(self, api) -> None:
+ self.api = api
+
+ def probe(self) -> list:
+ """check running services
+
+ Returns:
+ Generator[nagisplugin.Metric]: multiple metric elements (yield)
+ """
+ response = self.api.tasks()
+
+ for service in response:
+ data = dict()
+ data['name'] = service['serviceName']
+ data['image'] = service['repository']['image']
+ data['state'] = service['state']
+ data['desiredState'] = service['desiredState']
+ data['error'] = service['status']['error']
+ yield nagiosplugin.Metric(name='docker_service', value=data, context='docker_service')
+
+
+class DockerSwarmServiceStatesContext(nagiosplugin.Context):
+ def __init__(self, name):
+ super().__init__(name, fmt_metric='{name} is', result_cls=DockerSwarmServiceStatesResult)
+
+ def evaluate(self, metric, resource):
+ if metric.value['state'] == metric.value['desiredState']:
+ if metric.value['error'] == None:
+ return self.result_cls(nagiosplugin.Ok, "ok", metric)
+ else:
+ return self.result_cls(nagiosplugin.Warn, "warning", metric)
+ else:
+ return self.result_cls(nagiosplugin.Critical, "critical", metric)
+
+ return self.result_cls(nagiosplugin.Unknown, "unknown", metric)
+
+
+class DockerSwarmServiceStatesResult(nagiosplugin.Result):
+ def __str__(self):
+ if self.metric.value['state'] == self.metric.value['desiredState']:
+ if self.metric.value['error'] == None:
+ return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}'
+ else:
+ return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} with error {self.metric.value["error"]}'
+ else:
+ return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} (desired: {self.metric.value["desiredState"]}) with error {self.metric.value["error"]}'
+
+
+#
+# Docker Swarm general summary
+#
+class DockerSwarmSummary(nagiosplugin.Summary):
+ def verbose(self, results):
+ result_str = ''
+ for result in results:
+ result_str += f'{str(result)}\n'
+ return result_str
+
+ def ok(self, results):
+ return
+
+ def problem(self, results):
+ return
+
+
## Args
def parse_args() -> argparse.Namespace:
"""evaluates given arguments
@@ -100,6 +184,7 @@ def parse_args() -> argparse.Namespace:
argp.add_argument('-m', '--check_mode',
choices=[
'load',
+ 'service_states'
],
help='check mode to run')
@@ -130,6 +215,7 @@ def main():
# dice which check will be run bases on check_mode
api = SwarmpitAPI(host=args.api_host, token=args.api_token, port=args.api_port, ssl=args.api_ssl)
+ api.tasks()
match args.check_mode:
case 'load':
check = nagiosplugin.Check(
@@ -137,7 +223,12 @@ def main():
nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical),
nagiosplugin.Summary())
check.name = "swarm load"
-
+ case 'service_states':
+ check = nagiosplugin.Check(
+ DockerSwarmServiceStatesResource(api=api),
+ DockerSwarmServiceStatesContext(name='docker_service'),
+ DockerSwarmSummary())
+ check.name = "swarm service states"
case _:
raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}')