From 3ccc3fdf762cd1d444f8e876ff2ffa3967e014d8 Mon Sep 17 00:00:00 2001 From: Andrew Walker Date: Mon, 23 Dec 2024 11:42:49 -0600 Subject: [PATCH] Switch to using syslog for middleware logfiles This commit converts middleware to using syslog handler + syslog ident in order to uniquely filter log messages into separate files. This has advantage of streamlining remote syslog handling. Adding new logger destinations is also fairly straightforward. 1. Add new TNLog object with details for new log file 2. Add to tuple of all TNLog objects These are evaluated during syslog-ng configuration. If for some reason syslog is broken on the server then middleware will queue the records internally for when syslog becomes unbroken while writing the records to a fallback file for evaluation by a system administrator. --- src/middlewared/debian/control | 1 - .../syslog-ng/conf.d/tnaudit.conf.mako | 4 + .../syslog-ng/conf.d/tnfilters.conf.mako | 6 + .../etc_files/syslog-ng/syslog-ng.conf.mako | 27 ++- src/middlewared/middlewared/logger.py | 195 ++++++++++++++---- src/middlewared/middlewared/main.py | 7 +- 6 files changed, 191 insertions(+), 49 deletions(-) diff --git a/src/middlewared/debian/control b/src/middlewared/debian/control index df8fc0d1ed180..d9761eb9e0682 100644 --- a/src/middlewared/debian/control +++ b/src/middlewared/debian/control @@ -103,7 +103,6 @@ Depends: alembic, python3-acme, python3-aiohttp, python3-aiorwlock, - python3-anyio, python3-apps-validation, python3-asyncssh, python3-aws-requests-auth, diff --git a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako index cab6b60dd9f7d..e59962c4afcff 100644 --- a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako +++ b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako @@ -78,7 +78,11 @@ ${textwrap.indent(get_db(svc), ' ')} % if not audit_custom_section(svc, 'log'): log { +% if svc == 'MIDDLEWARE': + source(tn_middleware_src); +% else: source(s_src); +% endif filter(f_tnaudit_${svc.lower()}); parser(p_tnaudit); rewrite(r_rewrite_success); diff --git a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako index 600a51e2254f6..912d5cd5a4e85 100644 --- a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako +++ b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako @@ -1,5 +1,6 @@ <% from middlewared.plugins.audit.utils import audit_program, AUDITED_SERVICES + from middlewared.logger import ALL_LOG_FILES adv_conf = render_ctx['system.advanced.config'] @@ -53,6 +54,11 @@ filter f_scst { program("kernel") and match("dlm:" value("MESSAGE")); }; +# TrueNAS middleware filters +% for tnlog in ALL_LOG_FILES: +filter f_${tnlog.name or "middleware"} { program("${tnlog.get_ident()[:-2]}"); }; +% endfor + # Temporary SNMP filter: NAS-129124 filter f_snmp { program("snmpd") and match("unexpected header length" value("MESSAGE")); diff --git a/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako b/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako index c801052fa5244..f0474c4b7b637 100644 --- a/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako +++ b/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako @@ -1,6 +1,13 @@ <% +from middlewared.logger import DEFAULT_SYSLOG_PATH, ALL_LOG_FILES + logger = middleware.logger +# The messages coming in via middleware are already formatted by logger +# and so we don't want to do additional formatting. +syslog_template = 'template("${MESSAGE}\n")' + + def generate_syslog_remote_destination(advanced_config): result = "" syslog_server = advanced_config["syslogserver"] @@ -63,7 +70,7 @@ def generate_syslog_remote_destination(advanced_config): result += ' };\n' - result += 'log { source(tn_remote_src_files); filter(f_tnremote); destination(loghost); };\n' + result += 'log { source(tn_middleware_src); filter(f_tnremote); destination(loghost); };\n' result += 'log { source(s_src); filter(f_tnremote); destination(loghost); };\n' return result @@ -92,11 +99,8 @@ options { ################## source s_src { system(); internal(); }; -source tn_remote_src_files { - file("/var/log/middlewared.log"); - file("/var/log/failover.log"); - file("/var/log/fenced.log"); - file("/var/log/zettarepl.log"); +source tn_middleware_src { + unix-stream("${DEFAULT_SYSLOG_PATH}" create-dirs(yes) perm(0600)); }; ################## @@ -124,6 +128,17 @@ log { flags(final); }; + +####################### +# Middlewared-related log files +######################## +% for tnlog in ALL_LOG_FILES: +log { + source(tn_middleware_src); filter(f_${tnlog.name or "middleware"}); + destination { file(${tnlog.logfile} ${syslog_template}); }; +}; +% endfor + log { source(s_src); filter(f_auth); destination(d_auth); }; log { source(s_src); filter(f_cron); destination(d_cron); }; log { source(s_src); filter(f_daemon); destination(d_daemon); }; diff --git a/src/middlewared/middlewared/logger.py b/src/middlewared/middlewared/logger.py index 5f00cf1b39edb..ce129544cb4a7 100644 --- a/src/middlewared/middlewared/logger.py +++ b/src/middlewared/middlewared/logger.py @@ -1,10 +1,12 @@ import logging import logging.handlers -import os import queue +import socket import typing import warnings from .logging.console_formatter import ConsoleLogFormatter +from collections import deque +from dataclasses import dataclass # markdown debug is also considered useless logging.getLogger('MARKDOWN').setLevel(logging.INFO) @@ -56,8 +58,15 @@ DOCKER_IMAGE_LOGFILE = '/var/log/docker_image.log' FAILOVER_LOGFILE = '/var/log/failover.log' LOGFILE = '/var/log/middlewared.log' +DEFAULT_LOGFORMAT = '[%(asctime)s] (%(levelname)s) %(name)s.%(funcName)s():%(lineno)d - %(message)s' +FALLBACK_LOGFILE = '/var/log/fallback-middlewared.log' NETDATA_API_LOGFILE = '/var/log/netdata_api.log' ZETTAREPL_LOGFILE = '/var/log/zettarepl.log' +ZETTAREPL_LOGFORMAT = '[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s' + +DEFAULT_IDENT = 'MIDDLEWARE: ' +MIDDLEWARE_AUDIT_IDENT = 'TNAUDIT_MIDDLEWARE: ' +DEFAULT_SYSLOG_PATH = '/var/run/syslog-ng/middleware.sock' def trace(self, message, *args, **kws): @@ -69,14 +78,142 @@ def trace(self, message, *args, **kws): logging.Logger.trace = trace -class Logger: - """Pseudo-Class for Logger - Wrapper for logging module""" +@dataclass(slots=True, frozen=True) +class TNLog: + name: str | None + logfile: str + logformat: str = DEFAULT_LOGFORMAT + + def get_ident(self): + if self.name is None: + return DEFAULT_IDENT + + return f'{self.name.upper()}: ' + + +# NOTE if new separate log file needs to be added, create a new TNLog +# object and append to ALL_LOG_FILES tuple. These files are read by the +# syslog-ng config generation scripts and automatically handled. +TNLOG_MIDDLEWARE = TNLog(None, LOGFILE) +TNLOG_APP_LIFECYCLE = TNLog('app_lifecycle', APP_LIFECYCLE_LOGFILE) +TNLOG_APP_MIGRATION = TNLog('app_migration', APP_MIGRATION_LOGFILE) +TNLOG_DOCKER_IMAGE = TNLog('docker_image', DOCKER_IMAGE_LOGFILE) +TNLOG_FAILOVER = TNLog('failover', FAILOVER_LOGFILE) +TNLOG_NETDATA_API = TNLog('netdata_api', NETDATA_API_LOGFILE) +TNLOG_ZETTAREPL = TNLog('zettarepl', ZETTAREPL_LOGFILE, ZETTAREPL_LOGFORMAT) + +ALL_LOG_FILES = ( + TNLOG_MIDDLEWARE, + TNLOG_APP_LIFECYCLE, + TNLOG_APP_MIGRATION, + TNLOG_DOCKER_IMAGE, + TNLOG_FAILOVER, + TNLOG_NETDATA_API, + TNLOG_ZETTAREPL, +) + +# Audit entries are inserted into audit databases in /audit rather than +# written to files in /var/log and so they are not members of ALL_LOG_FILES +MIDDLEWARE_TNAUDIT = TNLog('TNAUDIT_MIDDLEWARE', '', '') + + +class TNSyslogHandler(logging.handlers.SysLogHandler): def __init__( - self, application_name: str, debug_level: typing.Optional[str]=None, - log_format: str='[%(asctime)s] (%(levelname)s) %(name)s.%(funcName)s():%(lineno)d - %(message)s' + self, + address: str = DEFAULT_SYSLOG_PATH, + pending_queue: deque | None = None ): + self.pending_queue = pending_queue + super().__init__(address, socktype=socket.SOCK_STREAM) + + def drain_pending_queue(self): + while self.pending_queue: + record = self.pending_queue.popleft() + try: + super().emit(record) + except Exception: + # Nope. Still dead. Put it back where we found it + self.pending_queue.appendleft(record) + return False + + return True + + def fallback(self, record): + if not self.fallback_handler: + return + + try: + self.fallback_handler.emit(record) + except Exception: + pass + + def emit(self, record): + if not self.drain_pending_queue(): + # Failed to drain our pending queue so add this record to the + # ever-growing deque + self.pending_queue.append(record) + self.fallback(record) + return + + try: + super().emit(record) + except Exception: + # logging framework done broke. Queue up + # for drain on next auditd message handled + self.pending_queue.append(record) + self.fallback(record) + + def handleError(self, record): + # re-raise it back up to the emit call + if self.pending_queue is None: + return super().handleError(record) + + raise + + def set_fallback_handler(self, fallback: logging.Handler): + if not isinstance(fallback, logging.Handler): + raise TypeError(f'{fallback}: not a logging.Handler') + + self.fallback_handler = fallback + + +def setup_syslog_handler(tnlog: TNLog, fallback: logging.Handler | None) -> logging.Logger: + # Use `QueueHandler` to avoid blocking IO in asyncio main loop + log_queue = queue.Queue() + queue_handler = logging.handlers.QueueHandler(log_queue) + + # Set up syslog handler with deque to store failed messages until + # they can be flushed. This can happen if syslog-ng isn't ready yet. + syslog_handler = TNSyslogHandler(pending_queue=deque()) + syslog_handler.setLevel(logging.DEBUG) + + if tnlog.logformat: + syslog_handler.setFormatter(logging.Formatter(tnlog.logformat, '%Y/%m/%d %H:%M:%S')) + + # Set ident for the logger. This becomes program name in syslog-ng and allows + # more precise filtering rules + syslog_handler.ident = tnlog.get_ident() + + # Set fallback for case where syslog is broken. This does not need separate queue + # since emit will happen in separate thread from main loop. + if fallback: + syslog_handler.set_fallback_handler(fallback) + + queue_listener = logging.handlers.QueueListener(log_queue, syslog_handler) + queue_listener.start() + logger = logging.getLogger(tnlog.name) + logger.addHandler(queue_handler) + if tnlog.name is not None: + logging.getLogger(tnlog.name).propagate = False + + return logger + + +class Logger: + """Pseudo-Class for Logger - Wrapper for logging module""" + def __init__(self, application_name: str, debug_level: str = 'DEBUG', log_format: str = DEFAULT_LOGFORMAT): self.application_name = application_name - self.debug_level = debug_level or 'DEBUG' + self.debug_level = debug_level self.log_format = log_format def getLogger(self): @@ -94,40 +231,22 @@ def configure_logging(self, output_option: str): console_handler.setFormatter(ConsoleLogFormatter(self.log_format, datefmt=time_format)) logging.root.addHandler(console_handler) else: - for name, filename, log_format in [ - (None, LOGFILE, self.log_format), - ('app_lifecycle', APP_LIFECYCLE_LOGFILE, self.log_format), - ('app_migrations', APP_MIGRATION_LOGFILE, self.log_format), - ('docker_image', DOCKER_IMAGE_LOGFILE, self.log_format), - ('failover', FAILOVER_LOGFILE, self.log_format), - ('netdata_api', NETDATA_API_LOGFILE, self.log_format), - ('zettarepl', ZETTAREPL_LOGFILE, - '[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s'), - ]: - self.setup_file_logger(name, filename, log_format) + # Set up our fallback logging mechanism (to file) in case syslog is broken + # We internally queue writes to regular log files while waiting for syslog to recover + # while simultaneously writing to the fallback file so that there is less potential to + # lose relevant logs. + fallback_handler = logging.handlers.RotatingFileHandler(FALLBACK_LOGFILE, 'a', 10485760, 5, 'utf-8') + fallback_handler.setLevel(logging.DEBUG) + fallback_handler.setFormatter(logging.Formatter(DEFAULT_LOGFORMAT, '%Y/%m/%d %H:%M:%S')) + + for tnlog in ALL_LOG_FILES: + setup_syslog_handler(tnlog, fallback_handler) logging.root.setLevel(getattr(logging, self.debug_level)) - def setup_file_logger(self, name: typing.Optional[str], filename: str, log_format: typing.Optional[str]): - # Use `QueueHandler` to avoid blocking IO in asyncio main loop - log_queue = queue.Queue() - queue_handler = logging.handlers.QueueHandler(log_queue) - file_handler = logging.handlers.RotatingFileHandler(filename, 'a', 10485760, 5, 'utf-8') - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(logging.Formatter(log_format, '%Y/%m/%d %H:%M:%S')) - queue_listener = logging.handlers.QueueListener(log_queue, file_handler) - queue_listener.start() - logging.getLogger(name).addHandler(queue_handler) - if name is not None: - logging.getLogger(name).propagate = False - - # Make sure various log files are not readable by everybody. - # umask could be another approach but chmod was chosen so - # it affects existing installs. - try: - os.chmod(filename, 0o640) - except OSError: - pass + +def setup_audit_logging() -> logging.Logger: + return setup_syslog_handler(MIDDLEWARE_TNAUDIT, None) def setup_logging(name: str, debug_level: typing.Optional[str], log_handler: typing.Optional[str]): @@ -137,4 +256,4 @@ def setup_logging(name: str, debug_level: typing.Optional[str], log_handler: typ if log_handler == 'console': _logger.configure_logging('console') else: - _logger.configure_logging('file') + _logger.configure_logging('syslog') diff --git a/src/middlewared/middlewared/main.py b/src/middlewared/middlewared/main.py index 65858eb732804..d86d483eaf114 100644 --- a/src/middlewared/middlewared/main.py +++ b/src/middlewared/middlewared/main.py @@ -64,12 +64,11 @@ import typing import uuid -from anyio import create_connected_unix_datagram_socket from systemd.daemon import notify as systemd_notify from truenas_api_client import json -from .logger import Logger, setup_logging +from .logger import Logger, setup_audit_logging, setup_logging SYSTEMD_EXTEND_USECS = 240000000 # 4mins in microseconds @@ -128,6 +127,7 @@ def __init__( self.tasks = set() self.api_versions = None self.api_versions_adapter = None + self.__audit_logger = setup_audit_logging() def create_task(self, coro, *, name=None): task = self.loop.create_task(coro, name=name) @@ -924,8 +924,7 @@ async def log_audit_message(self, app, event, event_data, success): } }) - async with await create_connected_unix_datagram_socket("/dev/log") as s: - await s.send(syslog_message(message)) + self.__audit_logger.debug(message) async def call(self, name, *params, app=None, audit_callback=None, job_on_progress_cb=None, pipes=None, profile=False):