#!/usr/bin/env python3 """Check docker via swarmpit python check""" """dependencys: - pip3 install nagiosplugin - pip3 install argparse - pip3 install requests """ __version__ = '0.2.0' __author__ = 'anima' # imports import logging import argparse import nagiosplugin import requests import json # log settings logging.basicConfig(format='[%(asctime)s] %(levelname)s %(message)s', level=logging.INFO) ## API class SwarmpitAPI: def __init__(self, host:str, token:str, port:int = 8080, ssl:bool = False) -> None: self.host = host self.port = port self.token = token self.ssl = ssl def __query(self, query:str, query_type:str = 'get', data:dict = None) -> dict | None: """default query to swarmpit api Args: query (str): substring of path (behind /api/) query_type (str, optional): specify type of query, post / get. Defaults to 'get'. data (dict, optional): _description_. Defaults to None. Returns: dict | None: return json string as dict or None response """ protocol = 'http' if self.port is None and self.ssl: self.port = 443 elif self.port is None: self.port = 80 if self.ssl: protocol += 's' headers = dict() headers['Content-Type'] = 'application/json' headers['Authorization'] = f'Bearer {self.token}' match query_type: case 'get': response = requests.get(f'{protocol}://{self.host}:{self.port}/api/{query}', headers=headers, verify=False) case 'post': response = requests.post(f'{protocol}://{self.host}:{self.port}/api/{query}', headers=headers, verify=False) if response.status_code == 200: if 'application/json' in response.headers['Content-Type']: json_response = json.loads(response.text) return json_response else: return response.content else: logging.error(f'non successfull response {response.content} [{response.status_code}]') logging.debug(f'{response.request.url=}') logging.debug(f'{response.request.headers=}') logging.debug(f'{response.request.body=}') return None def statistics(self): """Cluster statistics""" return self.__query('stats') def tasks(self): """Task list - aka running services""" return self.__query('tasks') # # Check Swarm Cores # class DockerSwarmCoresResource(nagiosplugin.Resource): def __init__(self, api) -> None: self.api = api def probe(self) -> list: """check core usage of full swarm Returns: nagiosplugin.Metric: single metric element (return) """ data = self.api.statistics() cpu = data['cpu'] cpu_usage_percent = round((cpu['usage'] / cpu['cores']) * 100, 2) return nagiosplugin.Metric(name='load', value=cpu_usage_percent, uom='%', context='scalar_context') # # Check Swarm services # class DockerSwarmServiceStatesResource(nagiosplugin.Resource): def __init__(self, api) -> None: self.api = api def probe(self) -> list: """check running services Returns: Generator[nagisplugin.Metric]: multiple metric elements (yield) """ response = self.api.tasks() for service in response: data = dict() data['name'] = service['serviceName'] data['image'] = service['repository']['image'] data['state'] = service['state'] data['desiredState'] = service['desiredState'] data['error'] = service['status']['error'] yield nagiosplugin.Metric(name='docker_service', value=data, context='docker_service') class DockerSwarmServiceStatesContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=DockerSwarmServiceStatesResult) def evaluate(self, metric, resource): if metric.value['state'] == metric.value['desiredState']: if metric.value['error'] == None: return self.result_cls(nagiosplugin.Ok, "ok", metric) else: return self.result_cls(nagiosplugin.Warn, "warning", metric) else: return self.result_cls(nagiosplugin.Critical, "critical", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class DockerSwarmServiceStatesResult(nagiosplugin.Result): def __str__(self): if self.metric.value['state'] == self.metric.value['desiredState']: if self.metric.value['error'] == None: return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]}' else: return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} with error {self.metric.value["error"]}' else: return f'Service {self.metric.value["name"]} ({self.metric.value["image"]}) is {self.metric.value["state"]} (desired: {self.metric.value["desiredState"]}) with error {self.metric.value["error"]}' # # Docker Swarm general summary # class DockerSwarmSummary(nagiosplugin.Summary): def verbose(self, results): result_str = '' for result in results: result_str += f'{str(result)}\n' return result_str def ok(self, results): return def problem(self, results): return ## Args def parse_args() -> argparse.Namespace: """evaluates given arguments Returns: argsparse.Namespace: Namespace Object with all arguments insert (use: args.long_name_of_argument) """ argp = argparse.ArgumentParser(description=__doc__) # Default args argp.add_argument('-v', '--verbose', action='count', default=0, help='increase output verbosity (use up to 3 times)') argp.add_argument('-H', '--hostname', default='localhost', help='IP address or hostname of device to query') argp.add_argument('-t', '--timeout', default=30, help='abort execution after TIMEOUT seconds') # TEMPLATE TODO: edit choices argp.add_argument('-m', '--check_mode', choices=[ 'load', 'service_states' ], help='check mode to run') # Nagios args / see https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT argp.add_argument('-w', '--warning', default=':80', help='warning threshold') argp.add_argument('-c', '--critical', default=':90', help='critical threshold') # API args argp.add_argument('-A', '--api-host', help='Swarmpit API host') argp.add_argument('-P', '--api-port', default=8080, help='Swarmpot API port') argp.add_argument('-S', '--api-ssl', action='store_true', help='Must set if Swarmpit API use https') argp.add_argument('-T', '--api-token', help='Swarmpit API Token') args = argp.parse_args() return args ## run def main(): args = parse_args() if args.verbose >= 3: logging.getLogger().setLevel(logging.DEBUG) # dice which check will be run bases on check_mode api = SwarmpitAPI(host=args.api_host, token=args.api_token, port=args.api_port, ssl=args.api_ssl) api.tasks() match args.check_mode: case 'load': check = nagiosplugin.Check( DockerSwarmCoresResource(api=api), nagiosplugin.ScalarContext(name='scalar_context', warning=args.warning, critical=args.critical), nagiosplugin.Summary()) check.name = "swarm load" case 'service_states': check = nagiosplugin.Check( DockerSwarmServiceStatesResource(api=api), DockerSwarmServiceStatesContext(name='docker_service'), DockerSwarmSummary()) check.name = "swarm service states" case _: raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}') check.main(args.verbose, args.timeout) if __name__ == '__main__': main()