344 lines
12 KiB
Python
344 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""Check docker via swarmpit python check"""
|
|
"""dependencys:
|
|
- pip3 install nagiosplugin
|
|
- pip3 install argparse
|
|
- pip3 install requests
|
|
"""
|
|
|
|
__version__ = '0.5.0'
|
|
__author__ = 'anima'
|
|
|
|
# imports
|
|
import logging
|
|
import argparse
|
|
import nagiosplugin
|
|
import requests
|
|
import json
|
|
|
|
# log settings
|
|
logging.basicConfig(format='[%(asctime)s] %(levelname)s %(message)s', level=logging.INFO)
|
|
|
|
## API
|
|
class SwarmpitAPI:
|
|
def __init__(self, host:str, token:str, port:int = 8080, ssl:bool = False) -> None:
|
|
self.host = host
|
|
self.port = port
|
|
self.token = token
|
|
self.ssl = ssl
|
|
|
|
def __query(self, query:str, query_type:str = 'get', data:dict = None) -> dict | None:
|
|
"""default query to swarmpit api
|
|
|
|
Args:
|
|
query (str): substring of path (behind /api/)
|
|
query_type (str, optional): specify type of query, post / get. Defaults to 'get'.
|
|
data (dict, optional): _description_. Defaults to None.
|
|
|
|
Returns:
|
|
dict | None: return json string as dict or None response
|
|
"""
|
|
protocol = 'http'
|
|
if self.port is None and self.ssl:
|
|
self.port = 443
|
|
elif self.port is None:
|
|
self.port = 80
|
|
if self.ssl:
|
|
protocol += 's'
|
|
headers = dict()
|
|
headers['Content-Type'] = 'application/json'
|
|
headers['Authorization'] = f'Bearer {self.token}'
|
|
match query_type:
|
|
case 'get':
|
|
response = requests.get(f'{protocol}://{self.host}:{self.port}/api/{query}', headers=headers, verify=False)
|
|
case 'post':
|
|
response = requests.post(f'{protocol}://{self.host}:{self.port}/api/{query}', headers=headers, verify=False)
|
|
|
|
if response.status_code == 200:
|
|
if 'application/json' in response.headers['Content-Type']:
|
|
json_response = json.loads(response.text)
|
|
return json_response
|
|
else:
|
|
return response.content
|
|
else:
|
|
logging.error(f'non successfull response {response.content} [{response.status_code}]')
|
|
logging.debug(f'{response.request.url=}')
|
|
logging.debug(f'{response.request.headers=}')
|
|
logging.debug(f'{response.request.body=}')
|
|
return None
|
|
|
|
def statistics(self):
|
|
"""Cluster statistics"""
|
|
return self.__query('stats')
|
|
|
|
def tasks(self):
|
|
"""Task list"""
|
|
return self.__query('tasks')
|
|
|
|
def services(self):
|
|
"""Service list"""
|
|
return self.__query('services')
|
|
|
|
def stacks(self):
|
|
"""Stack list"""
|
|
return self.__query('stacks')
|
|
|
|
#
|
|
# Check Swarm Cores
|
|
#
|
|
class DockerSwarmCoresResource(nagiosplugin.Resource):
|
|
def __init__(self, api) -> None:
|
|
self.api = api
|
|
|
|
def probe(self) -> list:
|
|
"""check core usage of full swarm
|
|
|
|
Returns:
|
|
nagiosplugin.Metric: single metric element (return)
|
|
"""
|
|
data = self.api.statistics()
|
|
memory = data['memory']
|
|
memory_usage_percent = round(memory['usage'], 2)
|
|
|
|
return nagiosplugin.Metric(name='load', value=memory_usage_percent, uom='%', context='scalar_context')
|
|
|
|
|
|
#
|
|
# Check Swarm memory
|
|
#
|
|
class DockerSwarmMemoryResource(nagiosplugin.Resource):
|
|
def __init__(self, api) -> None:
|
|
self.api = api
|
|
|
|
def probe(self) -> list:
|
|
"""check memory usage of full swarm
|
|
|
|
Returns:
|
|
nagiosplugin.Metric: single metric element (return)
|
|
"""
|
|
data = self.api.statistics()
|
|
print(data)
|
|
cpu = data['cpu']
|
|
cpu_usage_percent = round((cpu['usage'] / cpu['cores']) * 100, 2)
|
|
|
|
return nagiosplugin.Metric(name='memory', value=cpu_usage_percent, uom='%', context='scalar_context')
|
|
|
|
|
|
#
|
|
# Check Swarm services
|
|
#
|
|
class DockerSwarmServiceStatesResource(nagiosplugin.Resource):
|
|
def __init__(self, api) -> None:
|
|
self.api = api
|
|
|
|
def probe(self) -> list:
|
|
"""check running service states
|
|
|
|
Returns:
|
|
Generator[nagisplugin.Metric]: multiple metric elements (yield)
|
|
"""
|
|
response = self.api.services()
|
|
|
|
for service in response:
|
|
data = dict()
|
|
data['name'] = service['serviceName']
|
|
data['image'] = service['repository']['image']
|
|
data['state'] = service['state']
|
|
data['status'] = service['status']['tasks']
|
|
yield nagiosplugin.Metric(name='docker_service', value=data, context='docker_service')
|
|
|
|
|
|
class DockerSwarmServiceStatesContext(nagiosplugin.Context):
|
|
def __init__(self, name):
|
|
super().__init__(name, fmt_metric='{name} is', result_cls=DockerSwarmServiceStatesResult)
|
|
|
|
def evaluate(self, metric, resource):
|
|
if metric.value['state'] in ['running']:
|
|
if metric.value['status']['running'] < metric.value['status']['total']:
|
|
return self.result_cls(nagiosplugin.Warn, "warning", metric)
|
|
else:
|
|
return self.result_cls(nagiosplugin.Ok, "ok", metric)
|
|
else:
|
|
return self.result_cls(nagiosplugin.Critical, "critical", metric)
|
|
|
|
return self.result_cls(nagiosplugin.Unknown, "unknown", metric)
|
|
|
|
|
|
class DockerSwarmServiceStatesResult(nagiosplugin.Result):
|
|
def __str__(self):
|
|
if self.metric.value['state'] in ['running']:
|
|
if self.metric.value['status']['running'] < self.metric.value['status']['total']:
|
|
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} with {self.metric.value["status"]["running"]} of {self.metric.value["status"]["total"]} replicas'
|
|
else:
|
|
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}'
|
|
else:
|
|
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}'
|
|
|
|
|
|
class DockerSwarmServiceUpdatesResource(nagiosplugin.Resource):
|
|
def __init__(self, api) -> None:
|
|
self.api = api
|
|
|
|
def probe(self) -> list:
|
|
"""check service updates
|
|
|
|
maybe problems incomming with rate limit: https://docs.docker.com/docker-hub/usage/
|
|
|
|
Returns:
|
|
Generator[nagisplugin.Metric]: multiple metric elements (yield)
|
|
"""
|
|
response = self.api.services()
|
|
|
|
for service in response:
|
|
data = dict()
|
|
data['name'] = service['serviceName']
|
|
data['image'] = service['repository']['name']
|
|
data['tag'] = service['repository']['tag']
|
|
data['imageDigest'] = service['repository']['imageDigest']
|
|
|
|
versions = requests.get(f'https://hub.docker.com/v2/repositories/{data["image"]}/tags/latest')
|
|
if versions.status_code == 200:
|
|
json_data = json.loads(versions.text)
|
|
if 'digest' in json_data.keys():
|
|
data['latestDigest'] = json_data['digest']
|
|
else:
|
|
logging.error(f'no digest in response {versions.content} [{versions.status_code}]')
|
|
logging.debug(f'{versions.request.url=}')
|
|
logging.debug(f'{versions.request.headers=}')
|
|
logging.debug(f'{versions.request.body=}')
|
|
data['latestDigest'] = None
|
|
elif versions.status_code == 404:
|
|
logging.error(f'non successfull response {versions.content} [{versions.status_code}]')
|
|
logging.debug(f'{versions.request.url=}')
|
|
logging.debug(f'{versions.request.headers=}')
|
|
logging.debug(f'{versions.request.body=}')
|
|
data['latestDigest'] = None
|
|
yield nagiosplugin.Metric(name='docker_service', value=data, context='docker_service')
|
|
|
|
|
|
class DockerSwarmServiceUpdatesContext(nagiosplugin.Context):
|
|
def __init__(self, name):
|
|
super().__init__(name, fmt_metric='{name} is', result_cls=DockerSwarmServiceUpdatesResult)
|
|
|
|
def evaluate(self, metric, resource):
|
|
if metric.value['imageDigest'] == metric.value['latestDigest']:
|
|
return self.result_cls(nagiosplugin.Ok, "ok", metric)
|
|
elif metric.value['latestDigest'] is None:
|
|
return self.result_cls(nagiosplugin.Ok, "ok", metric)
|
|
else:
|
|
return self.result_cls(nagiosplugin.Warn, "warning", metric)
|
|
|
|
return self.result_cls(nagiosplugin.Unknown, "unknown", metric)
|
|
|
|
|
|
class DockerSwarmServiceUpdatesResult(nagiosplugin.Result):
|
|
def __str__(self):
|
|
if self.metric.value['imageDigest'] == self.metric.value['latestDigest']:
|
|
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}:{self.metric.value["tag"]}) is up2date'
|
|
elif self.metric.value['latestDigest'] is None:
|
|
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}:{self.metric.value["tag"]}) can not checked'
|
|
else:
|
|
return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}:{self.metric.value["tag"]}) has a newer latest version'
|
|
|
|
|
|
#
|
|
# Docker Swarm general summary
|
|
#
|
|
class DockerSwarmSummary(nagiosplugin.Summary):
|
|
def verbose(self, results):
|
|
result_str = ''
|
|
for result in results:
|
|
result_str += f'{str(result)}\n'
|
|
return result_str
|
|
|
|
def ok(self, results):
|
|
return
|
|
|
|
def problem(self, results):
|
|
return
|
|
|
|
|
|
## Args
|
|
def parse_args() -> argparse.Namespace:
|
|
"""evaluates given arguments
|
|
|
|
Returns:
|
|
argsparse.Namespace: Namespace Object with all arguments insert (use: args.long_name_of_argument)
|
|
"""
|
|
argp = argparse.ArgumentParser(description=__doc__)
|
|
# Default args
|
|
argp.add_argument('-v', '--verbose', action='count', default=0,
|
|
help='increase output verbosity (use up to 3 times)')
|
|
argp.add_argument('-H', '--hostname', default='localhost',
|
|
help='IP address or hostname of device to query')
|
|
argp.add_argument('-t', '--timeout', default=30,
|
|
help='abort execution after TIMEOUT seconds')
|
|
# TEMPLATE TODO: edit choices
|
|
argp.add_argument('-m', '--check_mode',
|
|
choices=[
|
|
'load',
|
|
'memory',
|
|
'service_states',
|
|
'service_updates'
|
|
],
|
|
help='check mode to run')
|
|
|
|
# Nagios args / see https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT
|
|
argp.add_argument('-w', '--warning', default=':80',
|
|
help='warning threshold')
|
|
argp.add_argument('-c', '--critical', default=':90',
|
|
help='critical threshold')
|
|
|
|
# API args
|
|
argp.add_argument('-A', '--api-host',
|
|
help='Swarmpit API host')
|
|
argp.add_argument('-P', '--api-port', default=8080,
|
|
help='Swarmpot API port')
|
|
argp.add_argument('-S', '--api-ssl', action='store_true',
|
|
help='Must set if Swarmpit API use https')
|
|
argp.add_argument('-T', '--api-token',
|
|
help='Swarmpit API Token')
|
|
|
|
args = argp.parse_args()
|
|
return args
|
|
|
|
## run
|
|
def main():
|
|
args = parse_args()
|
|
if args.verbose >= 3:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
# dice which check will be run bases on check_mode
|
|
api = SwarmpitAPI(host=args.api_host, token=args.api_token, port=args.api_port, ssl=args.api_ssl)
|
|
match args.check_mode:
|
|
case 'load':
|
|
check = nagiosplugin.Check(
|
|
DockerSwarmCoresResource(api=api),
|
|
nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical),
|
|
nagiosplugin.Summary())
|
|
check.name = "swarm load"
|
|
case 'memory':
|
|
check = nagiosplugin.Check(
|
|
DockerSwarmMemoryResource(api=api),
|
|
nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical),
|
|
nagiosplugin.Summary())
|
|
check.name = "swarm memory"
|
|
case 'service_states':
|
|
check = nagiosplugin.Check(
|
|
DockerSwarmServiceStatesResource(api=api),
|
|
DockerSwarmServiceStatesContext(name='docker_service'),
|
|
DockerSwarmSummary())
|
|
check.name = "swarm service states"
|
|
case 'service_updates':
|
|
check = nagiosplugin.Check(
|
|
DockerSwarmServiceUpdatesResource(api=api),
|
|
DockerSwarmServiceUpdatesContext(name='docker_service'),
|
|
DockerSwarmSummary())
|
|
check.name = "swarm service updates"
|
|
case _:
|
|
raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}')
|
|
|
|
check.main(args.verbose, args.timeout)
|
|
|
|
if __name__ == '__main__':
|
|
main() |