diff --git a/src/middlewared/debian/control b/src/middlewared/debian/control index df8fc0d1ed180..d9761eb9e0682 100644 --- a/src/middlewared/debian/control +++ b/src/middlewared/debian/control @@ -103,7 +103,6 @@ Depends: alembic, python3-acme, python3-aiohttp, python3-aiorwlock, - python3-anyio, python3-apps-validation, python3-asyncssh, python3-aws-requests-auth, diff --git a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako index cab6b60dd9f7d..e59962c4afcff 100644 --- a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako +++ b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnaudit.conf.mako @@ -78,7 +78,11 @@ ${textwrap.indent(get_db(svc), ' ')} % if not audit_custom_section(svc, 'log'): log { +% if svc == 'MIDDLEWARE': + source(tn_middleware_src); +% else: source(s_src); +% endif filter(f_tnaudit_${svc.lower()}); parser(p_tnaudit); rewrite(r_rewrite_success); diff --git a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako index 600a51e2254f6..912d5cd5a4e85 100644 --- a/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako +++ b/src/middlewared/middlewared/etc_files/syslog-ng/conf.d/tnfilters.conf.mako @@ -1,5 +1,6 @@ <% from middlewared.plugins.audit.utils import audit_program, AUDITED_SERVICES + from middlewared.logger import ALL_LOG_FILES adv_conf = render_ctx['system.advanced.config'] @@ -53,6 +54,11 @@ filter f_scst { program("kernel") and match("dlm:" value("MESSAGE")); }; +# TrueNAS middleware filters +% for tnlog in ALL_LOG_FILES: +filter f_${tnlog.name or "middleware"} { program("${tnlog.get_ident()[:-2]}"); }; +% endfor + # Temporary SNMP filter: NAS-129124 filter f_snmp { program("snmpd") and match("unexpected header length" value("MESSAGE")); diff --git a/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako b/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako index c801052fa5244..f0474c4b7b637 100644 --- a/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako +++ b/src/middlewared/middlewared/etc_files/syslog-ng/syslog-ng.conf.mako @@ -1,6 +1,13 @@ <% +from middlewared.logger import DEFAULT_SYSLOG_PATH, ALL_LOG_FILES + logger = middleware.logger +# The messages coming in via middleware are already formatted by logger +# and so we don't want to do additional formatting. +syslog_template = 'template("${MESSAGE}\n")' + + def generate_syslog_remote_destination(advanced_config): result = "" syslog_server = advanced_config["syslogserver"] @@ -63,7 +70,7 @@ def generate_syslog_remote_destination(advanced_config): result += ' };\n' - result += 'log { source(tn_remote_src_files); filter(f_tnremote); destination(loghost); };\n' + result += 'log { source(tn_middleware_src); filter(f_tnremote); destination(loghost); };\n' result += 'log { source(s_src); filter(f_tnremote); destination(loghost); };\n' return result @@ -92,11 +99,8 @@ options { ################## source s_src { system(); internal(); }; -source tn_remote_src_files { - file("/var/log/middlewared.log"); - file("/var/log/failover.log"); - file("/var/log/fenced.log"); - file("/var/log/zettarepl.log"); +source tn_middleware_src { + unix-stream("${DEFAULT_SYSLOG_PATH}" create-dirs(yes) perm(0600)); }; ################## @@ -124,6 +128,17 @@ log { flags(final); }; + +####################### +# Middlewared-related log files +######################## +% for tnlog in ALL_LOG_FILES: +log { + source(tn_middleware_src); filter(f_${tnlog.name or "middleware"}); + destination { file(${tnlog.logfile} ${syslog_template}); }; +}; +% endfor + log { source(s_src); filter(f_auth); destination(d_auth); }; log { source(s_src); filter(f_cron); destination(d_cron); }; log { source(s_src); filter(f_daemon); destination(d_daemon); }; diff --git a/src/middlewared/middlewared/logger.py b/src/middlewared/middlewared/logger.py index 5f00cf1b39edb..77a5755a367a6 100644 --- a/src/middlewared/middlewared/logger.py +++ b/src/middlewared/middlewared/logger.py @@ -1,10 +1,12 @@ import logging import logging.handlers -import os import queue +import socket import typing import warnings from .logging.console_formatter import ConsoleLogFormatter +from collections import deque +from dataclasses import dataclass # markdown debug is also considered useless logging.getLogger('MARKDOWN').setLevel(logging.INFO) @@ -56,8 +58,16 @@ DOCKER_IMAGE_LOGFILE = '/var/log/docker_image.log' FAILOVER_LOGFILE = '/var/log/failover.log' LOGFILE = '/var/log/middlewared.log' +DEFAULT_LOGFORMAT = '[%(asctime)s] (%(levelname)s) %(name)s.%(funcName)s():%(lineno)d - %(message)s' +FALLBACK_LOGFILE = '/var/log/fallback-middlewared.log' NETDATA_API_LOGFILE = '/var/log/netdata_api.log' ZETTAREPL_LOGFILE = '/var/log/zettarepl.log' +ZETTAREPL_LOGFORMAT = '[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s' + +DEFAULT_IDENT = 'MIDDLEWARE: ' +MIDDLEWARE_AUDIT_IDENT = 'TNAUDIT_MIDDLEWARE: ' +DEFAULT_SYSLOG_PATH = '/var/run/syslog-ng/middleware.sock' +DEFAULT_PENDING_QUEUE_LEN = 4096 def trace(self, message, *args, **kws): @@ -69,14 +79,199 @@ def trace(self, message, *args, **kws): logging.Logger.trace = trace -class Logger: - """Pseudo-Class for Logger - Wrapper for logging module""" +@dataclass(slots=True, frozen=True) +class TNLog: + name: str | None + logfile: str + logformat: str = DEFAULT_LOGFORMAT + pending_maxlen: int | None = DEFAULT_PENDING_QUEUE_LEN + + def get_ident(self): + if self.name is None: + return DEFAULT_IDENT + + return f'{self.name.upper()}: ' + + +# NOTE if new separate log file needs to be added, create a new TNLog +# object and append to ALL_LOG_FILES tuple. These files are read by the +# syslog-ng config generation scripts and automatically handled. +TNLOG_MIDDLEWARE = TNLog(None, LOGFILE) +TNLOG_APP_LIFECYCLE = TNLog('app_lifecycle', APP_LIFECYCLE_LOGFILE) +TNLOG_APP_MIGRATION = TNLog('app_migration', APP_MIGRATION_LOGFILE) +TNLOG_DOCKER_IMAGE = TNLog('docker_image', DOCKER_IMAGE_LOGFILE) +TNLOG_FAILOVER = TNLog('failover', FAILOVER_LOGFILE) +TNLOG_NETDATA_API = TNLog('netdata_api', NETDATA_API_LOGFILE) +TNLOG_ZETTAREPL = TNLog('zettarepl', ZETTAREPL_LOGFILE, ZETTAREPL_LOGFORMAT) + +# NOTE: this is also consumed by tests/unit/test_logger.py, which validates +# the auto-generated syslog-ng rules place messages in the correct log files. +ALL_LOG_FILES = ( + TNLOG_MIDDLEWARE, + TNLOG_APP_LIFECYCLE, + TNLOG_APP_MIGRATION, + TNLOG_DOCKER_IMAGE, + TNLOG_FAILOVER, + TNLOG_NETDATA_API, + TNLOG_ZETTAREPL, +) + +# Audit entries are inserted into audit databases in /audit rather than +# written to files in /var/log and so they are not members of ALL_LOG_FILES +MIDDLEWARE_TNAUDIT = TNLog('TNAUDIT_MIDDLEWARE', '', '', None) + + +class TNSyslogHandler(logging.handlers.SysLogHandler): def __init__( - self, application_name: str, debug_level: typing.Optional[str]=None, - log_format: str='[%(asctime)s] (%(levelname)s) %(name)s.%(funcName)s():%(lineno)d - %(message)s' + self, + address: str = DEFAULT_SYSLOG_PATH, + pending_queue: deque | None = None ): + """ + address - path to Unix socket (should be defined in syslog-ng config) + + pending_queue - deque object that will be used for storing failed + LogRecords if syslog is currently down. + + Note: maxlen should be set unless one wants to queue the log messages + without loss until syslog connection restored. This is probably + desireable for auditing, but not for general purposes (where it's + better to just specify a fallback handler). + """ + self.pending_queue = pending_queue + self.fallback_handler = None + super().__init__(address, socktype=socket.SOCK_STREAM) + + def drain_pending_queue(self) -> bool: + """ + Attempt to emit any log records that have been queued up due to logging + failures to the syslog socket. + + Returns: + True if successfully drained entire queue else False + + Raises: + Should not raise exceptions + """ + while self.pending_queue: + record = self.pending_queue.popleft() + try: + super().emit(record) + except Exception: + # Nope. Still dead. Put it back where we found it + self.pending_queue.appendleft(record) + return False + + return True + + def fallback(self, record: logging.LogRecord) -> None: + """ + Fallback logging mechanism in case the syslog target is down. + + In this case we emit the log record to the fallback handler and ignore + any errors. + + Returns: + None + + Raises: + Should not raise exceptions + """ + if not self.fallback_handler: + return + + try: + self.fallback_handler.emit(record) + except Exception: + pass + + def emit(self, record: logging.LogRecord) -> None: + """ + Emit a LogRecord to syslog. If this fails then add to pending queue and + emit via our fallback handler. + """ + + # First attempt to drain the pending queue + if not self.drain_pending_queue(): + # Failed to drain our pending queue so add this record to the + # ever-growing deque + self.pending_queue.append(record) + self.fallback(record) + return + + try: + super().emit(record) + except Exception: + # logging framework done broke. Queue up + # for drain on next auditd message handled + self.pending_queue.append(record) + self.fallback(record) + + def handleError(self, record: logging.LogRecord) -> None: + """ + Override the default syslog error handler if we have a pending_queue to + defined. Exception raised here passes back up to the the emit() call. + """ + # re-raise it back up to the emit call + if self.pending_queue is None: + return super().handleError(record) + + raise + + def set_fallback_handler(self, fallback: logging.Handler) -> None: + """ Set a fallback handler (for example to file) that will be used if syslog socket logging fails """ + if not isinstance(fallback, logging.Handler): + raise TypeError(f'{fallback}: not a logging.Handler') + + self.fallback_handler = fallback + + def close(self) -> None: + # Close our socket + super().close() + + if self.fallback_handler: + # close any open file handler + self.fallback_handler.close() + self.fallback_handler = None + + +def setup_syslog_handler(tnlog: TNLog, fallback: logging.Handler | None) -> logging.Logger: + # Use `QueueHandler` to avoid blocking IO in asyncio main loop + log_queue = queue.Queue() + queue_handler = logging.handlers.QueueHandler(log_queue) + + # Set up syslog handler with deque to store failed messages until + # they can be flushed. This can happen if syslog-ng isn't ready yet. + syslog_handler = TNSyslogHandler(pending_queue=deque(maxlen=tnlog.pending_maxlen)) + syslog_handler.setLevel(logging.DEBUG) + + if tnlog.logformat: + syslog_handler.setFormatter(logging.Formatter(tnlog.logformat, '%Y/%m/%d %H:%M:%S')) + + # Set ident for the logger. This becomes program name in syslog-ng and allows + # more precise filtering rules + syslog_handler.ident = tnlog.get_ident() + + # Set fallback for case where syslog is broken. This does not need separate queue + # since emit will happen in separate thread from main loop. + if fallback: + syslog_handler.set_fallback_handler(fallback) + + queue_listener = logging.handlers.QueueListener(log_queue, syslog_handler) + queue_listener.start() + logger = logging.getLogger(tnlog.name) + logger.addHandler(queue_handler) + if tnlog.name is not None: + logging.getLogger(tnlog.name).propagate = False + + return logger + + +class Logger: + """Pseudo-Class for Logger - Wrapper for logging module""" + def __init__(self, application_name: str, debug_level: str = 'DEBUG', log_format: str = DEFAULT_LOGFORMAT): self.application_name = application_name - self.debug_level = debug_level or 'DEBUG' + self.debug_level = debug_level self.log_format = log_format def getLogger(self): @@ -94,40 +289,22 @@ def configure_logging(self, output_option: str): console_handler.setFormatter(ConsoleLogFormatter(self.log_format, datefmt=time_format)) logging.root.addHandler(console_handler) else: - for name, filename, log_format in [ - (None, LOGFILE, self.log_format), - ('app_lifecycle', APP_LIFECYCLE_LOGFILE, self.log_format), - ('app_migrations', APP_MIGRATION_LOGFILE, self.log_format), - ('docker_image', DOCKER_IMAGE_LOGFILE, self.log_format), - ('failover', FAILOVER_LOGFILE, self.log_format), - ('netdata_api', NETDATA_API_LOGFILE, self.log_format), - ('zettarepl', ZETTAREPL_LOGFILE, - '[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s'), - ]: - self.setup_file_logger(name, filename, log_format) + # Set up our fallback logging mechanism (to file) in case syslog is broken + # We internally queue writes to regular log files while waiting for syslog to recover + # while simultaneously writing to the fallback file so that there is less potential to + # lose relevant logs. + fallback_handler = logging.handlers.RotatingFileHandler(FALLBACK_LOGFILE, 'a', 10485760, 5, 'utf-8') + fallback_handler.setLevel(logging.DEBUG) + fallback_handler.setFormatter(logging.Formatter(DEFAULT_LOGFORMAT, '%Y/%m/%d %H:%M:%S')) + + for tnlog in ALL_LOG_FILES: + setup_syslog_handler(tnlog, fallback_handler) logging.root.setLevel(getattr(logging, self.debug_level)) - def setup_file_logger(self, name: typing.Optional[str], filename: str, log_format: typing.Optional[str]): - # Use `QueueHandler` to avoid blocking IO in asyncio main loop - log_queue = queue.Queue() - queue_handler = logging.handlers.QueueHandler(log_queue) - file_handler = logging.handlers.RotatingFileHandler(filename, 'a', 10485760, 5, 'utf-8') - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter(logging.Formatter(log_format, '%Y/%m/%d %H:%M:%S')) - queue_listener = logging.handlers.QueueListener(log_queue, file_handler) - queue_listener.start() - logging.getLogger(name).addHandler(queue_handler) - if name is not None: - logging.getLogger(name).propagate = False - - # Make sure various log files are not readable by everybody. - # umask could be another approach but chmod was chosen so - # it affects existing installs. - try: - os.chmod(filename, 0o640) - except OSError: - pass + +def setup_audit_logging() -> logging.Logger: + return setup_syslog_handler(MIDDLEWARE_TNAUDIT, None) def setup_logging(name: str, debug_level: typing.Optional[str], log_handler: typing.Optional[str]): diff --git a/src/middlewared/middlewared/main.py b/src/middlewared/middlewared/main.py index 65858eb732804..d86d483eaf114 100644 --- a/src/middlewared/middlewared/main.py +++ b/src/middlewared/middlewared/main.py @@ -64,12 +64,11 @@ import typing import uuid -from anyio import create_connected_unix_datagram_socket from systemd.daemon import notify as systemd_notify from truenas_api_client import json -from .logger import Logger, setup_logging +from .logger import Logger, setup_audit_logging, setup_logging SYSTEMD_EXTEND_USECS = 240000000 # 4mins in microseconds @@ -128,6 +127,7 @@ def __init__( self.tasks = set() self.api_versions = None self.api_versions_adapter = None + self.__audit_logger = setup_audit_logging() def create_task(self, coro, *, name=None): task = self.loop.create_task(coro, name=name) @@ -924,8 +924,7 @@ async def log_audit_message(self, app, event, event_data, success): } }) - async with await create_connected_unix_datagram_socket("/dev/log") as s: - await s.send(syslog_message(message)) + self.__audit_logger.debug(message) async def call(self, name, *params, app=None, audit_callback=None, job_on_progress_cb=None, pipes=None, profile=False): diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py new file mode 100644 index 0000000000000..185fbd7ec8af5 --- /dev/null +++ b/tests/unit/test_logger.py @@ -0,0 +1,179 @@ +import logging +import logging.handlers +import os +import pytest +import subprocess + +from collections import deque +from contextlib import contextmanager +from middlewared.logger import ( + ALL_LOG_FILES, + DEFAULT_IDENT, + DEFAULT_LOGFORMAT, + DEFAULT_SYSLOG_PATH, + LOGFILE, + setup_syslog_handler, + TNSyslogHandler, +) +from time import sleep + +SYSLOG_WRITE_WAIT = 1 + + +def remove_logfile(path): + try: + os.unlink(path) + except FileNotFoundError: + pass + + +@pytest.fixture(scope='function') +def current_test_name(request): + return request.node.name + + +@pytest.fixture(scope='function') +def test_message(current_test_name): + return f'This is a test message for test {current_test_name}' + + +@pytest.fixture(scope='function') +def fallback_handler(tmpdir): + logfile = os.path.join(tmpdir, 'fallback.log') + fallback_handler = logging.handlers.RotatingFileHandler(logfile, 'a', 10485760, 5, 'utf-8') + fallback_handler.setLevel(logging.DEBUG) + fallback_handler.setFormatter(logging.Formatter(DEFAULT_LOGFORMAT, "%Y/%m/%d %H:%M:%S")) + + try: + yield (logfile, fallback_handler) + finally: + remove_logfile(logfile) + + +@contextmanager +def disable_syslog(): + subprocess.run(['service', 'syslog-ng', 'stop']) + + try: + yield + finally: + subprocess.run(['service', 'syslog-ng', 'start']) + # Give a little extra buffer for syslog-ng to fully start + sleep(SYSLOG_WRITE_WAIT) + + +@pytest.fixture(scope='function') +def broken_syslog(current_test_name): + handler = TNSyslogHandler(address='/var/empty/canary', pending_queue=deque()) + logger = logging.getLogger(current_test_name) + logger.addHandler(handler) + + try: + yield (logger, handler) + finally: + handler.close() + + +@pytest.fixture(scope='function') +def broken_syslog_with_fallback(broken_syslog, fallback_handler): + logger, syslog_handler = broken_syslog + logfile, fallback = fallback_handler + + syslog_handler.set_fallback_handler(fallback) + + yield (logger, syslog_handler, logfile) + + +@pytest.fixture(scope='function') +def working_syslog(current_test_name): + """ Set up syslog logger to use middleware rules (via ident) so that it + can successfully write to the middleware log file. """ + + handler = TNSyslogHandler(address=DEFAULT_SYSLOG_PATH, pending_queue=deque()) + handler.ident = DEFAULT_IDENT + logger = logging.getLogger(current_test_name) + logger.addHandler(handler) + + try: + yield (logger, handler) + finally: + handler.close() + + +def test__pending_queue(broken_syslog, test_message): + """ Verify that when syslog connection is broken messages are queued """ + logger, handler = broken_syslog + assert handler.pending_queue is not None + assert handler.fallback_handler is None + + logger.critical(test_message) + assert len(handler.pending_queue) == 1 + assert handler.pending_queue[0].msg == test_message + + +def test__fallback_handler(broken_syslog_with_fallback, test_message): + """ Verify that fallback handler results in writes to log file """ + logger, handler, logfile = broken_syslog_with_fallback + + assert handler.pending_queue is not None + assert handler.fallback_handler is not None + + logger.critical(test_message) + assert len(handler.pending_queue) == 1 + assert handler.pending_queue[0].msg == test_message + + with open(logfile, 'r') as f: + contents = f.read() + assert test_message in contents + + +def test__working_syslog_handler(working_syslog, test_message): + """ Verify that syslog handler writes to the middleware log """ + logger, handler = working_syslog + assert handler.pending_queue is not None + assert handler.fallback_handler is None + + logger.critical(test_message) + assert len(handler.pending_queue) == 0 + sleep(SYSLOG_WRITE_WAIT) + + with open(LOGFILE, 'r') as f: + contents = f.read() + assert test_message in contents + + +def test__syslog_handler_recovery(working_syslog, test_message, current_test_name): + """ Verify that pending queue is properly drained and written to syslog target """ + logger, handler = working_syslog + assert handler.pending_queue is not None + assert handler.fallback_handler is None + + with disable_syslog(): + logger.critical(test_message) + assert len(handler.pending_queue) == 1 + + # Queue only gets drained on message emit + flush_message = f'Flush message for {current_test_name}' + logger.critical(flush_message) + + # Queue should be fully drained now that syslog is working + assert len(handler.pending_queue) == 0 + sleep(SYSLOG_WRITE_WAIT) + + # Message should now be written to file in order in which + # it was initially emitted + with open(LOGFILE, 'r') as f: + contents = f.read() + assert f'{test_message}\n{flush_message}' in contents + + +@pytest.mark.parametrize('tnlog', ALL_LOG_FILES) +def test__middleware_logger_paths(tnlog, test_message): + """ Verify that syslog filtering rules work properly for backends """ + logger = setup_syslog_handler(tnlog, None) + logger.critical(test_message) + + sleep(SYSLOG_WRITE_WAIT) + with open(tnlog.logfile, 'r') as f: + contents = f.read() + assert test_message in contents