Files
icinga-plugins/checks/check_api_swarmpit.py

240 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""Check docker via swarmpit python check"""
"""dependencys:
- pip3 install nagiosplugin
- pip3 install argparse
- pip3 install requests
"""
__version__ = '0.3.0'
__author__ = 'anima'
# imports
import logging
import argparse
import nagiosplugin
import requests
import json
# log settings
logging.basicConfig(format='[%(asctime)s] %(levelname)s %(message)s', level=logging.INFO)
## API
class SwarmpitAPI:
def __init__(self, host:str, token:str, port:int = 8080, ssl:bool = False) -> None:
self.host = host
self.port = port
self.token = token
self.ssl = ssl
def __query(self, query:str, query_type:str = 'get', data:dict = None) -> dict | None:
"""default query to swarmpit api
Args:
query (str): substring of path (behind /api/)
query_type (str, optional): specify type of query, post / get. Defaults to 'get'.
data (dict, optional): _description_. Defaults to None.
Returns:
dict | None: return json string as dict or None response
"""
protocol = 'http'
if self.port is None and self.ssl:
self.port = 443
elif self.port is None:
self.port = 80
if self.ssl:
protocol += 's'
headers = dict()
headers['Content-Type'] = 'application/json'
headers['Authorization'] = f'Bearer {self.token}'
match query_type:
case 'get':
response = requests.get(f'{protocol}://{self.host}:{self.port}/api/{query}', headers=headers, verify=False)
case 'post':
response = requests.post(f'{protocol}://{self.host}:{self.port}/api/{query}', headers=headers, verify=False)
if response.status_code == 200:
if 'application/json' in response.headers['Content-Type']:
json_response = json.loads(response.text)
return json_response
else:
return response.content
else:
logging.error(f'non successfull response {response.content} [{response.status_code}]')
logging.debug(f'{response.request.url=}')
logging.debug(f'{response.request.headers=}')
logging.debug(f'{response.request.body=}')
return None
def statistics(self):
"""Cluster statistics"""
return self.__query('stats')
def tasks(self):
"""Task list"""
return self.__query('tasks')
def services(self):
"""Service list"""
return self.__query('services')
#
# Check Swarm Cores
#
class DockerSwarmCoresResource(nagiosplugin.Resource):
def __init__(self, api) -> None:
self.api = api
def probe(self) -> list:
"""check core usage of full swarm
Returns:
nagiosplugin.Metric: single metric element (return)
"""
data = self.api.statistics()
cpu = data['cpu']
cpu_usage_percent = round((cpu['usage'] / cpu['cores']) * 100, 2)
return nagiosplugin.Metric(name='load', value=cpu_usage_percent, uom='%', context='scalar_context')
#
# Check Swarm services
#
class DockerSwarmServiceStatesResource(nagiosplugin.Resource):
def __init__(self, api) -> None:
self.api = api
def probe(self) -> list:
"""check running services
Returns:
Generator[nagisplugin.Metric]: multiple metric elements (yield)
"""
response = self.api.services()
for service in response:
data = dict()
data['name'] = service['serviceName']
data['image'] = service['repository']['image']
data['state'] = service['state']
data['status'] = service['status']['tasks']
yield nagiosplugin.Metric(name='docker_service', value=data, context='docker_service')
class DockerSwarmServiceStatesContext(nagiosplugin.Context):
def __init__(self, name):
super().__init__(name, fmt_metric='{name} is', result_cls=DockerSwarmServiceStatesResult)
def evaluate(self, metric, resource):
if metric.value['state'] in ['running']:
if metric.value['status']['running'] < metric.value['status']['total']:
return self.result_cls(nagiosplugin.Warn, "warning", metric)
else:
return self.result_cls(nagiosplugin.Ok, "ok", metric)
else:
return self.result_cls(nagiosplugin.Critical, "critical", metric)
return self.result_cls(nagiosplugin.Unknown, "unknown", metric)
class DockerSwarmServiceStatesResult(nagiosplugin.Result):
def __str__(self):
if self.metric.value['state'] in ['running']:
if self.metric.value['status']['running'] < self.metric.value['status']['total']:
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} with {self.metric.value['status']['running']} of {self.metric.value['status']['total']} replicas'
else:
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}'
else:
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}'
#
# Docker Swarm general summary
#
class DockerSwarmSummary(nagiosplugin.Summary):
def verbose(self, results):
result_str = ''
for result in results:
result_str += f'{str(result)}\n'
return result_str
def ok(self, results):
return
def problem(self, results):
return
## Args
def parse_args() -> argparse.Namespace:
"""evaluates given arguments
Returns:
argsparse.Namespace: Namespace Object with all arguments insert (use: args.long_name_of_argument)
"""
argp = argparse.ArgumentParser(description=__doc__)
# Default args
argp.add_argument('-v', '--verbose', action='count', default=0,
help='increase output verbosity (use up to 3 times)')
argp.add_argument('-H', '--hostname', default='localhost',
help='IP address or hostname of device to query')
argp.add_argument('-t', '--timeout', default=30,
help='abort execution after TIMEOUT seconds')
# TEMPLATE TODO: edit choices
argp.add_argument('-m', '--check_mode',
choices=[
'load',
'service_states'
],
help='check mode to run')
# Nagios args / see https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT
argp.add_argument('-w', '--warning', default=':80',
help='warning threshold')
argp.add_argument('-c', '--critical', default=':90',
help='critical threshold')
# API args
argp.add_argument('-A', '--api-host',
help='Swarmpit API host')
argp.add_argument('-P', '--api-port', default=8080,
help='Swarmpot API port')
argp.add_argument('-S', '--api-ssl', action='store_true',
help='Must set if Swarmpit API use https')
argp.add_argument('-T', '--api-token',
help='Swarmpit API Token')
args = argp.parse_args()
return args
## run
def main():
args = parse_args()
if args.verbose >= 3:
logging.getLogger().setLevel(logging.DEBUG)
# dice which check will be run bases on check_mode
api = SwarmpitAPI(host=args.api_host, token=args.api_token, port=args.api_port, ssl=args.api_ssl)
match args.check_mode:
case 'load':
check = nagiosplugin.Check(
DockerSwarmCoresResource(api=api),
nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical),
nagiosplugin.Summary())
check.name = "swarm load"
case 'service_states':
check = nagiosplugin.Check(
DockerSwarmServiceStatesResource(api=api),
DockerSwarmServiceStatesContext(name='docker_service'),
DockerSwarmSummary())
check.name = "swarm service states"
case _:
raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}')
check.main(args.verbose, args.timeout)
if __name__ == '__main__':
main()