#!/usr/bin/env python3 """SNMP based Synology DMS / DMS UC checks""" """dependencys: - pip3 install nagiosplugin - pip3 install argparse - pip3 install easysnmp - https://easysnmp.readthedocs.io/en/latest/index.html - https://easysnmp.readthedocs.io/en/latest/session_api.html """ __version__ = '0.10.0' __author__ = 'anima' # imports import logging import argparse import nagiosplugin from easysnmp import Session # log settings logging.basicConfig(format='[%(asctime)s] %(levelname)s %(message)s', level=logging.INFO) def init_snmp_session(hostname: str = 'localhost', snmp_version: str | int = '2', snmp_community: str = 'public', snmp_security_level: str = 'auth_with_privacy', snmp_security_username: str = 'monitoring', snmp_auth_protocol: str = 'SHA256', snmp_auth_password: str | None = None, snmp_privacy_protocol: str = 'AES', snmp_privacy_password: str | None = None ) -> Session: """init a easysnmp session Args: hostname (str, optional): hostname or ip of host. Defaults to 'localhost'. snmp_version (str, optional): version of snmp (1, 2, 3). Defaults to 2. snmp_community (str, only need for v1 & v2): snmp community if snmp v1 or v2 used (any). Defaults to 'public'. snmp_security_level (str, only need for v3): security level (no_auth_or_privacy, auth_without_privacy or auth_with_privacy). Defaults to 'auth_with_privacy'. snmp_security_username (str, only need for v3): security name (any). Defaults to 'monitoring'. snmp_auth_protocol (str, only need for v3): authentication protocol (MD5, SHA256, SHA512). Defaults to 'SHA256'. snmp_auth_password (str, only need for v3): authentication passphrase (any). Defaults to None. snmp_privacy_protocol (str, only need for v3): privacy protocol (AES, DES). Defaults to 'AES'. snmp_privacy_password (str, only need for v3): privacy passphrase (any). Defaults to None. Return: easysnmp.Session: session for get or walk actions """ # check if snmp version an valid str or int and saves as int valid_versions: list[str, int] = ['1','2','3', 1, 2, 3] if snmp_version not in valid_versions: raise ValueError(f'{snmp_version=} non of {valid_versions=}') else: snmp_version = int(snmp_version) if snmp_version == '3': # check if snmp security level valid valid_security_levels: list[str] = ['no_auth_or_privacy', 'auth_without_privacy', 'auth_with_privacy'] if snmp_security_level not in valid_security_levels: raise ValueError(f'{snmp_security_level=} is non of {valid_security_levels=}') # check if auth protocol valid if snmp_security_level.startswith('auth'): valid_auth_protocols: list[str] = ['MD5', 'SHA', 'SHA256', 'SHA512'] if snmp_auth_protocol not in valid_auth_protocols: raise ValueError(f'{snmp_auth_protocol=} is non of {valid_auth_protocols=}') if snmp_auth_password is None: raise ValueError(f'{snmp_auth_password=} is not set') # check if privacy protocol valid if not snmp_security_level.startswith('no'): valid_privacy_protocols: list[str] = ['AES', 'DES'] if snmp_privacy_protocol not in valid_privacy_protocols: raise ValueError(f'{snmp_privacy_protocol=} is non of {valid_privacy_protocols=}') if snmp_privacy_password is None: raise ValueError(f'{snmp_privacy_password=} is not set') if snmp_version == 3: # 2025-01-02: only 'auth_with_privacy' is tested session = Session(hostname=hostname, version=snmp_version, security_level=snmp_security_level, security_username=snmp_security_username, privacy_protocol=snmp_privacy_protocol, privacy_password=snmp_privacy_password, auth_protocol=snmp_auth_protocol, auth_password=snmp_auth_password) else: session = Session(hostname=hostname, version=snmp_version, community=snmp_community) return session # # Synology [DSM & DSM UC] Checks disk staus (baseclass) # class SNMPSynologyDisk(): def __init__(self, session): self.session = session def get_disks(self): disks = list() baseoid = '.1.3.6.1.4.1.6574.2.1.1' oids = dict() oids['index'] = '1' oids['id'] = '2' oids['model'] = '3' oids['type'] = '4' oids['status'] = '5' oids['temp'] = '6' oids['role'] = '7' oids['retry'] = '8' oids['badSectors'] = '9' oids['identFail'] = '10' oids['remainLife'] = '11' oids['name'] = '12' oids['health'] = '13' disk_ids = list() results = self.session.walk(baseoid) for result in results: if '6574.2.1.1.1.' in result.oid: disk_ids.append(result.value) for disk in disk_ids: tmp_disk = dict() for result in results: for name, oid in oids.items(): if '6574.2.1.1.' + oid + '.' + disk in result.oid: tmp_disk[name] = result.value break disks.append(tmp_disk) return disks # # Synology General Summary # class SNMPSynologySummary(nagiosplugin.Summary): def verbose(self, results): result_str = '' for result in results: result_str += f'{str(result)}\n' return result_str def ok(self, results): return def problem(self, results): return # # Synology [DSM & DSM UC] System partition status. # class SNMPSynologySystemResource(nagiosplugin.Resource): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check system status (normal or failed) Returns: nagiosplugin.Metric: single metric element (return) """ baseoid = '.1.3.6.1.4.1.6574.1' oids = dict() oids['status'] = '.1.0' oids['model'] = '.5.1.0' oids['sn'] = '.5.2.0' # oids['hostname'] = '.1.0' result = dict() for key, oid in oids.items(): result[key] = self.session.get(baseoid + oid).value return nagiosplugin.Metric(name='status', value=result, context='system_context') class SNMPSynologySystemContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=SNMPSynologySystemResult) def evaluate(self, metric, resource): if metric.value['status'] == '2': return self.result_cls(nagiosplugin.Critical, "critical", metric) elif metric.value['status'] == '1': return self.result_cls(nagiosplugin.Ok, "ok", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class SNMPSynologySystemResult(nagiosplugin.Result): def __str__(self): if self.metric.value['status'] == '1': status = 'normal' else: status = 'failed' return f'{self.metric.value["model"]} (SN: {self.metric.value["sn"]}) is in status {status}!' # # Synology [DSM & DSM UC] Temperature of NAS # class SNMPSynologyTemperatureResource(nagiosplugin.Resource, SNMPSynologyDisk): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check system temperature in °C Returns: nagiosplugin.Metric: single metric element (return) """ baseoid = '.1.3.6.1.4.1.6574.1' oids = dict() result = self.session.get(baseoid + '.2.0').value yield nagiosplugin.Metric(name='temp_system', value=int(result), uom='°C', context='temperature_scalar_context') disks = self.get_disks() for disk in disks: yield nagiosplugin.Metric(name='temp_' + disk['id'].lower().replace(' ', '_'), value=int(disk['temp']), uom='°C', context='temperature_scalar_context') def __init__(self, session) -> None: self.session = session # # Synology [DSM & DSM UC] Returns error if power supplies fail # class SNMPSynologyPowerSupplyResource(nagiosplugin.Resource): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check power supply status (normal or failed) Returns: nagiosplugin.Metric: single metric element (return) """ baseoid = '.1.3.6.1.4.1.6574.1' result = self.session.get(baseoid + '.3.0').value return nagiosplugin.Metric(name='status', value=result, context='powersupply_context') class SNMPSynologyPowerSupplyContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=SNMPSynologyPowerSupplyResult) def evaluate(self, metric, resource): if metric.value == '2': return self.result_cls(nagiosplugin.Critical, "critical", metric) elif metric.value == '1': return self.result_cls(nagiosplugin.Ok, "ok", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class SNMPSynologyPowerSupplyResult(nagiosplugin.Result): def __str__(self): if self.metric.value == '1': status = 'normal' else: status = 'failed' return f'Power Supply is in status {status}!' # # Synology [DSM & DSM UC] returns error if system fan fails # class SNMPSynologyFansResource(nagiosplugin.Resource): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check fans status (normal or failed) Returns: Generator[nagisplugin.Metric]: multiple metric elements (yield) """ baseoid = '.1.3.6.1.4.1.6574.1' fans = dict() fans['system'] = '.4.1.0' fans['cpu'] = '.4.2.0' for fan, oid in fans.items(): result = self.session.get(baseoid + oid).value yield nagiosplugin.Metric(name=fan, value=result, context='fan_context') class SNMPSynologyFansContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=SNMPSynologyFansResult) def evaluate(self, metric, resource): if metric.value == '2': return self.result_cls(nagiosplugin.Critical, "critical", metric) elif metric.value == '1': return self.result_cls(nagiosplugin.Ok, "ok", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class SNMPSynologyFansResult(nagiosplugin.Result): def __str__(self): if self.metric.value == '1': status = 'normal' else: status = 'failed' return f'{self.metric.name} fan is in status {status}!' # # Synology [DSM & DSM UC] Checks whether a new version or update of DSM is available # class SNMPSynologyFirmwareResource(nagiosplugin.Resource): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check if firmware update available Returns: nagiosplugin.Metric: single metric element (return) """ baseoid = '.1.3.6.1.4.1.6574.1' oids = dict() oids['version'] = '.5.3.0' oids['status'] = '.5.4.0' oids['model'] = '.5.1.0' oids['sn'] = '.5.2.0' result = dict() for key, oid in oids.items(): result[key] = self.session.get(baseoid + oid).value return nagiosplugin.Metric(name='status', value=result, context='firmware_context') class SNMPSynologyFirmwareContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=SNMPSynologyFirmwareResult) def evaluate(self, metric, resource): if metric.value['status'] == '1': return self.result_cls(nagiosplugin.Critical, "critical", metric) elif metric.value['status'] == '2': return self.result_cls(nagiosplugin.Ok, "ok", metric) elif metric.value['status'] in ['3', '4']: return self.result_cls(nagiosplugin.Warn, "warning", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class SNMPSynologyFirmwareResult(nagiosplugin.Result): def __str__(self): if self.metric.value['status'] == '1': status = 'available' elif self.metric.value['status'] == '2': status = 'up2date' elif self.metric.value['status'] == '3': status = 'connecting' elif self.metric.value['status'] == '4': status = 'disconnected' else: status = 'unknown' return f'Firmware of {self.metric.value["model"]} (SN: {self.metric.value["sn"]}; Version: {self.metric.value["version"]}) is in status {status}!' # # Synology [DSM] Utilization % is the sum of user and system CPU usage # class SNMPSynologyCPUResource(nagiosplugin.Resource): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check cpu usage % Returns: nagiosplugin.Metric: single metric element (return) """ baseoid = '.1.3.6.1.4.1.6574.1' result = self.session.get(baseoid + '.7.1.0').value return nagiosplugin.Metric(name='cpu', value=int(result), uom='%', context='cpu_scalar_context') def __init__(self, session) -> None: self.session = session # # Synology [DSM] Utilization % is the sum of user and system CPU usage # class SNMPSynologyMemoryResource(nagiosplugin.Resource): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check memory usage in % Returns: nagiosplugin.Metric: single metric element (return) """ baseoid = '.1.3.6.1.4.1.6574.1' result = self.session.get(baseoid + '.7.2.0').value return nagiosplugin.Metric(name='memory', value=int(result), uom='%', context='memory_scalar_context') def __init__(self, session) -> None: self.session = session # # Synology [DSM & DSM UC] Checks disks # class SNMPSynologyDiskHealthResource(nagiosplugin.Resource, SNMPSynologyDisk): def __init__(self, session) -> None: self.session = session def probe(self) -> list: """check staus and health of all disks Returns: Generator[nagisplugin.Metric]: multiple metric elements (yield) """ disks = self.get_disks() for disk in disks: yield nagiosplugin.Metric(name=disk['id'], value=disk, context='disk_status_context') yield nagiosplugin.Metric(name=disk['id'], value=disk, context='disk_health_context') class SNMPSynologyDiskStatusContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=SNMPSynologyDiskStatusResult) def evaluate(self, metric, resource): if metric.value['status'] in ['4', '5']: return self.result_cls(nagiosplugin.Critical, "critical", metric) elif metric.value['status'] in ['2', '3']: return self.result_cls(nagiosplugin.Warn, "warning", metric) elif metric.value['status'] == '1': return self.result_cls(nagiosplugin.Ok, "ok", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class SNMPSynologyDiskStatusResult(nagiosplugin.Result): def __str__(self): if self.metric.value['status'] == '1': status = 'Normal' elif self.metric.value['status'] == '2': status = 'Initialized' elif self.metric.value['status'] == '3': status = 'NotInitialized' elif self.metric.value['status'] == '4': status = 'SystemPartitionFailed' elif self.metric.value['status'] == '5': status = 'Crashed' else: status = 'unknown' return f'{self.metric.name} (role: {self.metric.value["role"]}) is in status {status}!' class SNMPSynologyDiskHealthContext(nagiosplugin.Context): def __init__(self, name): super().__init__(name, fmt_metric='{name} is', result_cls=SNMPSynologyDiskHealthResult) def evaluate(self, metric, resource): if metric.value['health'] in ['3', '4']: return self.result_cls(nagiosplugin.Critical, "critical", metric) elif metric.value['health'] == '2': return self.result_cls(nagiosplugin.Warn, "warning", metric) elif metric.value['health'] == '1': return self.result_cls(nagiosplugin.Ok, "ok", metric) return self.result_cls(nagiosplugin.Unknown, "unknown", metric) class SNMPSynologyDiskHealthResult(nagiosplugin.Result): def __str__(self): if self.metric.value['health'] == '1': status = 'Normal' elif self.metric.value['health'] == '2': status = 'Warning' elif self.metric.value['health'] == '3': status = 'Critical' elif self.metric.value['health'] == '4': status = 'Failing' else: status = 'unknown' return f'Health of {self.metric.name} is in status {status}!' # # Arguments # def parse_args() -> argparse.Namespace: """evaluates given arguments Returns: argsparse.Namespace: Namespace Object with all arguments insert (use: args.long_name_of_argument) """ argp = argparse.ArgumentParser(description=__doc__) # Default args argp.add_argument('-v', '--verbose', action='count', default=0, help='increase output verbosity (use up to 3 times)') argp.add_argument('-H', '--hostname', default='localhost', help='IP address or hostname of device to query') argp.add_argument('-t', '--timeout', default=30, help='abort execution after TIMEOUT seconds') # TEMPLATE TODO: edit choices argp.add_argument('-m', '--check_mode', choices=[ 'system', 'temperature', 'powersupply', 'fans', 'firmware', 'cpu', 'memory', 'disk', ], help='check mode to run') # Nagios args / see https://nagios-plugins.org/doc/guidelines.html#THRESHOLDFORMAT argp.add_argument('-w', '--warning', default=':80', help='warning threshold') argp.add_argument('-c', '--critical', default=':90', help='critical threshold') # SNMP args (same -* as snmpwalk / snmpget linux command; exept -C/-c & -V /-v because already used (see above)) argp.add_argument('-V', '--snmp-version', default='2', help='Used SNMP Version. (1, 2, 3)') argp.add_argument('-C', '--snmp_community', default='public', help='SNMP community if snmp v1 or v2 used (any).') argp.add_argument('-l', '--snmp-security-level', default='auth_with_privacy', help='Security level (no_auth_or_privacy, auth_without_privacy or auth_with_privacy)') argp.add_argument('-u', '--snmp-security-username', default='monitoring', help='Security name (any)') argp.add_argument('-a', '--snmp-auth-protocol', default='SHA256', help='Authentication protocol (MD5, SHA256, SHA512)') argp.add_argument('-A', '--snmp-auth-pass', default=None, help='Authentication passphrase (any)') argp.add_argument('-x', '--snmp-privacy-protocol', default='AES', help='Privacy protocol (AES, DES)') argp.add_argument('-X', '--snmp-privacy-pass', default=None, help='Privacy passphrase (any)') args = argp.parse_args() return args # # init # def main(): args = parse_args() if args.verbose >= 3: logging.getLogger().setLevel(logging.DEBUG) session = init_snmp_session(hostname=args.hostname, snmp_version=args.snmp_version, snmp_community=args.snmp_community, snmp_security_level=args.snmp_security_level, snmp_security_username=args.snmp_security_username, snmp_auth_protocol=args.snmp_auth_protocol, snmp_auth_password=args.snmp_auth_pass, snmp_privacy_protocol=args.snmp_privacy_protocol, snmp_privacy_password=args.snmp_privacy_pass) # dice which check will be run bases on check_mode match args.check_mode: case 'system': check = nagiosplugin.Check(SNMPSynologySystemResource(session=session), SNMPSynologySystemContext(name='system_context'), nagiosplugin.Summary()) check.name = "System Status" case 'temperature': check = nagiosplugin.Check(SNMPSynologyTemperatureResource(session=session), nagiosplugin.ScalarContext(name='temperature_scalar_context', warning=args.warning, critical=args.critical), nagiosplugin.Summary()) check.name = "System Temperature" case 'powersupply': check = nagiosplugin.Check(SNMPSynologyPowerSupplyResource(session=session), SNMPSynologyPowerSupplyContext(name='powersupply_context'), nagiosplugin.Summary()) check.name = "Power Supply Status" case 'fans': check = nagiosplugin.Check(SNMPSynologyFansResource(session=session), SNMPSynologyFansContext(name='fan_context'), SNMPSynologySummary()) check.name = "Fans Status" case 'firmware': check = nagiosplugin.Check(SNMPSynologyFirmwareResource(session=session), SNMPSynologyFirmwareContext(name='firmware_context'), nagiosplugin.Summary()) check.name = "Firmware" case 'cpu': check = nagiosplugin.Check(SNMPSynologyCPUResource(session=session), nagiosplugin.ScalarContext(name='cpu_scalar_context', warning=args.warning, critical=args.critical), nagiosplugin.Summary()) check.name = "CPU Usage" case 'memory': check = nagiosplugin.Check(SNMPSynologyMemoryResource(session=session), nagiosplugin.ScalarContext(name='memory_scalar_context', warning=args.warning, critical=args.critical), nagiosplugin.Summary()) check.name = "Memory Usage" case 'disk': check = nagiosplugin.Check(SNMPSynologyDiskHealthResource(session=session), SNMPSynologyDiskHealthContext(name='disk_health_context'), SNMPSynologyDiskStatusContext(name='disk_status_context'), SNMPSynologySummary()) check.name = "Disk Health" pass case _: raise nagiosplugin.CheckError(f'Unknown check mode: {args.check_mode}') check.main(args.verbose, args.timeout) if __name__ == '__main__': main()