Skip to content

Commit

Permalink
Switch to using syslog for middleware logfiles
Browse files Browse the repository at this point in the history
This commit converts middleware  to using syslog handler + syslog
ident in order to uniquely filter log messages into separate files.

This has advantage of streamlining remote syslog handling.

Adding new logger destinations is also fairly straightforward.
1. Add new TNLog object with details for new log file
2. Add to tuple of all TNLog objects

These are evaluated during syslog-ng configuration.

If for some reason syslog is broken on the server then middleware
will queue the records internally for when syslog becomes unbroken
while writing the records to a fallback file for evaluation by
a system administrator.
  • Loading branch information
anodos325 committed Dec 23, 2024
1 parent 7c79c21 commit 3ccc3fd
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 49 deletions.
1 change: 0 additions & 1 deletion src/middlewared/debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ Depends: alembic,
python3-acme,
python3-aiohttp,
python3-aiorwlock,
python3-anyio,
python3-apps-validation,
python3-asyncssh,
python3-aws-requests-auth,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@ ${textwrap.indent(get_db(svc), ' ')}

% if not audit_custom_section(svc, 'log'):
log {
% if svc == 'MIDDLEWARE':
source(tn_middleware_src);
% else:
source(s_src);
% endif
filter(f_tnaudit_${svc.lower()});
parser(p_tnaudit);
rewrite(r_rewrite_success);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<%
from middlewared.plugins.audit.utils import audit_program, AUDITED_SERVICES
from middlewared.logger import ALL_LOG_FILES
adv_conf = render_ctx['system.advanced.config']
Expand Down Expand Up @@ -53,6 +54,11 @@ filter f_scst {
program("kernel") and match("dlm:" value("MESSAGE"));
};

# TrueNAS middleware filters
% for tnlog in ALL_LOG_FILES:
filter f_${tnlog.name or "middleware"} { program("${tnlog.get_ident()[:-2]}"); };
% endfor

# Temporary SNMP filter: NAS-129124
filter f_snmp {
program("snmpd") and match("unexpected header length" value("MESSAGE"));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
<%
from middlewared.logger import DEFAULT_SYSLOG_PATH, ALL_LOG_FILES
logger = middleware.logger
# The messages coming in via middleware are already formatted by logger
# and so we don't want to do additional formatting.
syslog_template = 'template("${MESSAGE}\n")'
def generate_syslog_remote_destination(advanced_config):
result = ""
syslog_server = advanced_config["syslogserver"]
Expand Down Expand Up @@ -63,7 +70,7 @@ def generate_syslog_remote_destination(advanced_config):
result += ' };\n'
result += 'log { source(tn_remote_src_files); filter(f_tnremote); destination(loghost); };\n'
result += 'log { source(tn_middleware_src); filter(f_tnremote); destination(loghost); };\n'
result += 'log { source(s_src); filter(f_tnremote); destination(loghost); };\n'
return result
Expand Down Expand Up @@ -92,11 +99,8 @@ options {
##################
source s_src { system(); internal(); };

source tn_remote_src_files {
file("/var/log/middlewared.log");
file("/var/log/failover.log");
file("/var/log/fenced.log");
file("/var/log/zettarepl.log");
source tn_middleware_src {
unix-stream("${DEFAULT_SYSLOG_PATH}" create-dirs(yes) perm(0600));
};

##################
Expand Down Expand Up @@ -124,6 +128,17 @@ log {
flags(final);
};


#######################
# Middlewared-related log files
########################
% for tnlog in ALL_LOG_FILES:
log {
source(tn_middleware_src); filter(f_${tnlog.name or "middleware"});
destination { file(${tnlog.logfile} ${syslog_template}); };
};
% endfor

log { source(s_src); filter(f_auth); destination(d_auth); };
log { source(s_src); filter(f_cron); destination(d_cron); };
log { source(s_src); filter(f_daemon); destination(d_daemon); };
Expand Down
195 changes: 157 additions & 38 deletions src/middlewared/middlewared/logger.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import logging.handlers
import os
import queue
import socket
import typing
import warnings
from .logging.console_formatter import ConsoleLogFormatter
from collections import deque
from dataclasses import dataclass

# markdown debug is also considered useless
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
Expand Down Expand Up @@ -56,8 +58,15 @@
DOCKER_IMAGE_LOGFILE = '/var/log/docker_image.log'
FAILOVER_LOGFILE = '/var/log/failover.log'
LOGFILE = '/var/log/middlewared.log'
DEFAULT_LOGFORMAT = '[%(asctime)s] (%(levelname)s) %(name)s.%(funcName)s():%(lineno)d - %(message)s'
FALLBACK_LOGFILE = '/var/log/fallback-middlewared.log'
NETDATA_API_LOGFILE = '/var/log/netdata_api.log'
ZETTAREPL_LOGFILE = '/var/log/zettarepl.log'
ZETTAREPL_LOGFORMAT = '[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s'

DEFAULT_IDENT = 'MIDDLEWARE: '
MIDDLEWARE_AUDIT_IDENT = 'TNAUDIT_MIDDLEWARE: '
DEFAULT_SYSLOG_PATH = '/var/run/syslog-ng/middleware.sock'


def trace(self, message, *args, **kws):
Expand All @@ -69,14 +78,142 @@ def trace(self, message, *args, **kws):
logging.Logger.trace = trace


class Logger:
"""Pseudo-Class for Logger - Wrapper for logging module"""
@dataclass(slots=True, frozen=True)
class TNLog:
name: str | None
logfile: str
logformat: str = DEFAULT_LOGFORMAT

def get_ident(self):
if self.name is None:
return DEFAULT_IDENT

return f'{self.name.upper()}: '


# NOTE if new separate log file needs to be added, create a new TNLog
# object and append to ALL_LOG_FILES tuple. These files are read by the
# syslog-ng config generation scripts and automatically handled.
TNLOG_MIDDLEWARE = TNLog(None, LOGFILE)
TNLOG_APP_LIFECYCLE = TNLog('app_lifecycle', APP_LIFECYCLE_LOGFILE)
TNLOG_APP_MIGRATION = TNLog('app_migration', APP_MIGRATION_LOGFILE)
TNLOG_DOCKER_IMAGE = TNLog('docker_image', DOCKER_IMAGE_LOGFILE)
TNLOG_FAILOVER = TNLog('failover', FAILOVER_LOGFILE)
TNLOG_NETDATA_API = TNLog('netdata_api', NETDATA_API_LOGFILE)
TNLOG_ZETTAREPL = TNLog('zettarepl', ZETTAREPL_LOGFILE, ZETTAREPL_LOGFORMAT)

ALL_LOG_FILES = (
TNLOG_MIDDLEWARE,
TNLOG_APP_LIFECYCLE,
TNLOG_APP_MIGRATION,
TNLOG_DOCKER_IMAGE,
TNLOG_FAILOVER,
TNLOG_NETDATA_API,
TNLOG_ZETTAREPL,
)

# Audit entries are inserted into audit databases in /audit rather than
# written to files in /var/log and so they are not members of ALL_LOG_FILES
MIDDLEWARE_TNAUDIT = TNLog('TNAUDIT_MIDDLEWARE', '', '')


class TNSyslogHandler(logging.handlers.SysLogHandler):
def __init__(
self, application_name: str, debug_level: typing.Optional[str]=None,
log_format: str='[%(asctime)s] (%(levelname)s) %(name)s.%(funcName)s():%(lineno)d - %(message)s'
self,
address: str = DEFAULT_SYSLOG_PATH,
pending_queue: deque | None = None
):
self.pending_queue = pending_queue
super().__init__(address, socktype=socket.SOCK_STREAM)

def drain_pending_queue(self):
while self.pending_queue:
record = self.pending_queue.popleft()
try:
super().emit(record)
except Exception:
# Nope. Still dead. Put it back where we found it
self.pending_queue.appendleft(record)
return False

return True

def fallback(self, record):
if not self.fallback_handler:
return

try:
self.fallback_handler.emit(record)
except Exception:
pass

def emit(self, record):
if not self.drain_pending_queue():
# Failed to drain our pending queue so add this record to the
# ever-growing deque
self.pending_queue.append(record)
self.fallback(record)
return

try:
super().emit(record)
except Exception:
# logging framework done broke. Queue up
# for drain on next auditd message handled
self.pending_queue.append(record)
self.fallback(record)

def handleError(self, record):
# re-raise it back up to the emit call
if self.pending_queue is None:
return super().handleError(record)

raise

def set_fallback_handler(self, fallback: logging.Handler):
if not isinstance(fallback, logging.Handler):
raise TypeError(f'{fallback}: not a logging.Handler')

self.fallback_handler = fallback


def setup_syslog_handler(tnlog: TNLog, fallback: logging.Handler | None) -> logging.Logger:
# Use `QueueHandler` to avoid blocking IO in asyncio main loop
log_queue = queue.Queue()
queue_handler = logging.handlers.QueueHandler(log_queue)

# Set up syslog handler with deque to store failed messages until
# they can be flushed. This can happen if syslog-ng isn't ready yet.
syslog_handler = TNSyslogHandler(pending_queue=deque())
syslog_handler.setLevel(logging.DEBUG)

if tnlog.logformat:
syslog_handler.setFormatter(logging.Formatter(tnlog.logformat, '%Y/%m/%d %H:%M:%S'))

# Set ident for the logger. This becomes program name in syslog-ng and allows
# more precise filtering rules
syslog_handler.ident = tnlog.get_ident()

# Set fallback for case where syslog is broken. This does not need separate queue
# since emit will happen in separate thread from main loop.
if fallback:
syslog_handler.set_fallback_handler(fallback)

queue_listener = logging.handlers.QueueListener(log_queue, syslog_handler)
queue_listener.start()
logger = logging.getLogger(tnlog.name)
logger.addHandler(queue_handler)
if tnlog.name is not None:
logging.getLogger(tnlog.name).propagate = False

return logger


class Logger:
"""Pseudo-Class for Logger - Wrapper for logging module"""
def __init__(self, application_name: str, debug_level: str = 'DEBUG', log_format: str = DEFAULT_LOGFORMAT):
self.application_name = application_name
self.debug_level = debug_level or 'DEBUG'
self.debug_level = debug_level
self.log_format = log_format

def getLogger(self):
Expand All @@ -94,40 +231,22 @@ def configure_logging(self, output_option: str):
console_handler.setFormatter(ConsoleLogFormatter(self.log_format, datefmt=time_format))
logging.root.addHandler(console_handler)
else:
for name, filename, log_format in [
(None, LOGFILE, self.log_format),
('app_lifecycle', APP_LIFECYCLE_LOGFILE, self.log_format),
('app_migrations', APP_MIGRATION_LOGFILE, self.log_format),
('docker_image', DOCKER_IMAGE_LOGFILE, self.log_format),
('failover', FAILOVER_LOGFILE, self.log_format),
('netdata_api', NETDATA_API_LOGFILE, self.log_format),
('zettarepl', ZETTAREPL_LOGFILE,
'[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s'),
]:
self.setup_file_logger(name, filename, log_format)
# Set up our fallback logging mechanism (to file) in case syslog is broken
# We internally queue writes to regular log files while waiting for syslog to recover
# while simultaneously writing to the fallback file so that there is less potential to
# lose relevant logs.
fallback_handler = logging.handlers.RotatingFileHandler(FALLBACK_LOGFILE, 'a', 10485760, 5, 'utf-8')
fallback_handler.setLevel(logging.DEBUG)
fallback_handler.setFormatter(logging.Formatter(DEFAULT_LOGFORMAT, '%Y/%m/%d %H:%M:%S'))

for tnlog in ALL_LOG_FILES:
setup_syslog_handler(tnlog, fallback_handler)

logging.root.setLevel(getattr(logging, self.debug_level))

def setup_file_logger(self, name: typing.Optional[str], filename: str, log_format: typing.Optional[str]):
# Use `QueueHandler` to avoid blocking IO in asyncio main loop
log_queue = queue.Queue()
queue_handler = logging.handlers.QueueHandler(log_queue)
file_handler = logging.handlers.RotatingFileHandler(filename, 'a', 10485760, 5, 'utf-8')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter(log_format, '%Y/%m/%d %H:%M:%S'))
queue_listener = logging.handlers.QueueListener(log_queue, file_handler)
queue_listener.start()
logging.getLogger(name).addHandler(queue_handler)
if name is not None:
logging.getLogger(name).propagate = False

# Make sure various log files are not readable by everybody.
# umask could be another approach but chmod was chosen so
# it affects existing installs.
try:
os.chmod(filename, 0o640)
except OSError:
pass

def setup_audit_logging() -> logging.Logger:
return setup_syslog_handler(MIDDLEWARE_TNAUDIT, None)


def setup_logging(name: str, debug_level: typing.Optional[str], log_handler: typing.Optional[str]):
Expand All @@ -137,4 +256,4 @@ def setup_logging(name: str, debug_level: typing.Optional[str], log_handler: typ
if log_handler == 'console':
_logger.configure_logging('console')
else:
_logger.configure_logging('file')
_logger.configure_logging('syslog')
7 changes: 3 additions & 4 deletions src/middlewared/middlewared/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@
import typing
import uuid

from anyio import create_connected_unix_datagram_socket
from systemd.daemon import notify as systemd_notify

from truenas_api_client import json

from .logger import Logger, setup_logging
from .logger import Logger, setup_audit_logging, setup_logging

SYSTEMD_EXTEND_USECS = 240000000 # 4mins in microseconds

Expand Down Expand Up @@ -128,6 +127,7 @@ def __init__(
self.tasks = set()
self.api_versions = None
self.api_versions_adapter = None
self.__audit_logger = setup_audit_logging()

def create_task(self, coro, *, name=None):
task = self.loop.create_task(coro, name=name)
Expand Down Expand Up @@ -924,8 +924,7 @@ async def log_audit_message(self, app, event, event_data, success):
}
})

async with await create_connected_unix_datagram_socket("/dev/log") as s:
await s.send(syslog_message(message))
self.__audit_logger.debug(message)

async def call(self, name, *params, app=None, audit_callback=None, job_on_progress_cb=None, pipes=None,
profile=False):
Expand Down

0 comments on commit 3ccc3fd

Please sign in to comment.