add check service states

This commit is contained in:
2025-02-23 03:23:25 +01:00
parent 6805182b17
commit 482ffcda99

View File

@@ -6,7 +6,7 @@
- pip3 install requests - pip3 install requests
""" """
__version__ = '0.1.0' __version__ = '0.2.0'
__author__ = 'anima' __author__ = 'anima'
# imports # imports
@@ -28,6 +28,16 @@ class SwarmpitAPI:
self.ssl = ssl self.ssl = ssl
def __query(self, query:str, query_type:str = 'get', data:dict = None) -> dict | None: def __query(self, query:str, query_type:str = 'get', data:dict = None) -> dict | None:
"""default query to swarmpit api
Args:
query (str): substring of path (behind /api/)
query_type (str, optional): specify type of query, post / get. Defaults to 'get'.
data (dict, optional): _description_. Defaults to None.
Returns:
dict | None: return json string as dict or None response
"""
protocol = 'http' protocol = 'http'
if self.port is None and self.ssl: if self.port is None and self.ssl:
self.port = 443 self.port = 443
@@ -58,8 +68,13 @@ class SwarmpitAPI:
return None return None
def statistics(self): def statistics(self):
"""Cluster statistics"""
return self.__query('stats') return self.__query('stats')
def tasks(self):
"""Task list - aka running services"""
return self.__query('tasks')
# #
# Check Swarm Cores # Check Swarm Cores
@@ -81,6 +96,75 @@ class DockerSwarmCoresResource(nagiosplugin.Resource):
return nagiosplugin.Metric(name='load', value=cpu_usage_percent, uom='%', context='scalar_context') return nagiosplugin.Metric(name='load', value=cpu_usage_percent, uom='%', context='scalar_context')
#
# Check Swarm services
#
class DockerSwarmServiceStatesResource(nagiosplugin.Resource):
def __init__(self, api) -> None:
self.api = api
def probe(self) -> list:
"""check running services
Returns:
Generator[nagisplugin.Metric]: multiple metric elements (yield)
"""
response = self.api.tasks()
for service in response:
data = dict()
data['name'] = service['serviceName']
data['image'] = service['repository']['image']
data['state'] = service['state']
data['desiredState'] = service['desiredState']
data['error'] = service['status']['error']
yield nagiosplugin.Metric(name='docker_service', value=data, context='docker_service')
class DockerSwarmServiceStatesContext(nagiosplugin.Context):
def __init__(self, name):
super().__init__(name, fmt_metric='{name} is', result_cls=DockerSwarmServiceStatesResult)
def evaluate(self, metric, resource):
if metric.value['state'] == metric.value['desiredState']:
if metric.value['error'] == None:
return self.result_cls(nagiosplugin.Ok, "ok", metric)
else:
return self.result_cls(nagiosplugin.Warn, "warning", metric)
else:
return self.result_cls(nagiosplugin.Critical, "critical", metric)
return self.result_cls(nagiosplugin.Unknown, "unknown", metric)
class DockerSwarmServiceStatesResult(nagiosplugin.Result):
def __str__(self):
if self.metric.value['state'] == self.metric.value['desiredState']:
if self.metric.value['error'] == None:
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}'
else:
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} with error {self.metric.value["error"]}'
else:
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} (desired: {self.metric.value["desiredState"]}) with error {self.metric.value["error"]}'
#
# Docker Swarm general summary
#
class DockerSwarmSummary(nagiosplugin.Summary):
def verbose(self, results):
result_str = ''
for result in results:
result_str += f'{str(result)}\n'
return result_str
def ok(self, results):
return
def problem(self, results):
return
## Args ## Args
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
"""evaluates given arguments """evaluates given arguments
@@ -100,6 +184,7 @@ def parse_args() -> argparse.Namespace:
argp.add_argument('-m', '--check_mode', argp.add_argument('-m', '--check_mode',
choices=[ choices=[
'load', 'load',
'service_states'
], ],
help='check mode to run') help='check mode to run')
@@ -130,6 +215,7 @@ def main():
# dice which check will be run bases on check_mode # dice which check will be run bases on check_mode
api = SwarmpitAPI(host=args.api_host, token=args.api_token, port=args.api_port, ssl=args.api_ssl) api = SwarmpitAPI(host=args.api_host, token=args.api_token, port=args.api_port, ssl=args.api_ssl)
api.tasks()
match args.check_mode: match args.check_mode:
case 'load': case 'load':
check = nagiosplugin.Check( check = nagiosplugin.Check(
@@ -137,7 +223,12 @@ def main():
nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical), nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical),
nagiosplugin.Summary()) nagiosplugin.Summary())
check.name = "swarm load" check.name = "swarm load"
case 'service_states':
check = nagiosplugin.Check(
DockerSwarmServiceStatesResource(api=api),
DockerSwarmServiceStatesContext(name='docker_service'),
DockerSwarmSummary())
check.name = "swarm service states"
case _: case _:
raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}') raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}')