From 11a9d5dc728dd0c00d07502b8cd2d437cba850a6 Mon Sep 17 00:00:00 2001 From: "B. Blechschmidt" Date: Thu, 30 May 2024 22:42:02 +0200 Subject: [PATCH] Improve rootless support --- pallium/cmd.py | 2 +- pallium/config.py | 10 +- pallium/dhcp.py | 5 +- pallium/dnsproxy.py | 3 +- pallium/hops/hop.py | 25 +++- pallium/hops/socksapp.py | 18 ++- pallium/hops/ssh.py | 4 + pallium/hops/tor.py | 5 +- pallium/netns.py | 266 ++++++++++++++++++--------------------- pallium/profiles.py | 135 +------------------- pallium/sandbox.py | 22 +++- pallium/security.py | 12 +- pallium/sysutil.py | 80 +++++++++--- pallium/xpra.py | 20 ++- tests/test_api.py | 6 + 15 files changed, 284 insertions(+), 329 deletions(-) diff --git a/pallium/cmd.py b/pallium/cmd.py index 0723953..ada3d05 100644 --- a/pallium/cmd.py +++ b/pallium/cmd.py @@ -202,7 +202,7 @@ def opener(_, __): profile = Profile.from_config(data) new_session = True - elif config_json: + elif config_json is not None: profile = Profile.from_config(config_json) new_session = True else: diff --git a/pallium/config.py b/pallium/config.py index e957c3e..c325299 100644 --- a/pallium/config.py +++ b/pallium/config.py @@ -1,6 +1,7 @@ # Not in use yet import dataclasses import ipaddress +import os import typing @@ -163,10 +164,15 @@ def from_json(cls, obj): return result +def default_command(): + shell = os.environ.get('SHELL', '/usr/bin/sh') + return [shell] + + @json_serializable @dataclasses.dataclass class Run: - command: typing.Optional[typing.List[str]] = dataclasses.field(default=None) + command: typing.Optional[typing.List[str]] = dataclasses.field(default_factory=default_command) quiet: bool = dataclasses.field(default=False) # Whether to suppress status information of pallium and its helpers @@ -184,5 +190,5 @@ class Networking: @dataclasses.dataclass class Configuration: networking: Networking = dataclasses.field(default_factory=Networking) - sandbox: typing.Optional[Sandbox] = dataclasses.field(default=None) + sandbox: Sandbox = dataclasses.field(default_factory=Sandbox) run: Run = dataclasses.field(default_factory=Run) diff --git a/pallium/dhcp.py b/pallium/dhcp.py index 34de658..924c4d1 100644 --- a/pallium/dhcp.py +++ b/pallium/dhcp.py @@ -55,13 +55,10 @@ def _get_conf(self): return conf def _run_server(self): - def preexec_fn(): - sysutil.prctl(sysutil.PR_SET_PDEATHSIG, signal.SIGTERM) self.cmd_args += ['-d' if self.debug else '-k'] self.dhcp_server = util.popen(['dnsmasq', '--conf-file=-', '--pid-file'] + self.cmd_args, - stdin=subprocess.PIPE, - preexec_fn=preexec_fn) + stdin=subprocess.PIPE) sysutil.write_blocking(self.dhcp_server.stdin.fileno(), self._get_conf().encode('ascii')) self.dhcp_server.stdin.close() diff --git a/pallium/dnsproxy.py b/pallium/dnsproxy.py index 0d32f66..2918ef2 100644 --- a/pallium/dnsproxy.py +++ b/pallium/dnsproxy.py @@ -6,6 +6,7 @@ import socket import struct import os +import subprocess import threading import time @@ -137,8 +138,6 @@ def f(): child = os.fork() if forked else None if not forked or forked and child == 0: if forked: - if security.is_sudo_or_root(): - sysutil.prctl(sysutil.PR_SET_PDEATHSIG, signal.SIGKILL) onexit.clear() proxy.start(threaded=threaded) if forked: diff --git a/pallium/hops/hop.py b/pallium/hops/hop.py index 72f6f97..e3668ce 100644 --- a/pallium/hops/hop.py +++ b/pallium/hops/hop.py @@ -111,8 +111,23 @@ def __init__(self, quiet=None, dns=None, **kwargs): self.required_routes = [] @classmethod - def from_json(cls, value: typing.Dict[str, typing.Any]) -> 'Hop': - value = dict(value) + def from_json(cls, obj: typing.Dict[str, typing.Any]) -> 'Hop': + # Do not modify the passed dict. + obj = dict(obj) + + if 'dns' in obj: + proxied_addrs = [] + non_proxied_addrs = [] + for addr in obj['dns']: + if addr.startswith('tcp://'): + proxied_addrs.append(addr[6:]) + else: + non_proxied_addrs.append(addr) + dns = non_proxied_addrs + if len(proxied_addrs) > 0: + dns.append(DnsTcpProxy(proxied_addrs)) + obj['dns'] = dns + type2class = dict() for hop_class in util.get_subclasses(cls): class_name = hop_class.__name__ @@ -120,11 +135,12 @@ def from_json(cls, value: typing.Dict[str, typing.Any]) -> 'Hop': class_name = class_name[:-len('Hop')] type2class[class_name.lower()] = hop_class - hop_type = value.pop('type') + hop_type = obj.pop('type') hop_class = type2class.get(hop_type.lower()) if hop_class is None: raise "" - return hop_class(**value) + + return hop_class(**obj) def popen(self, *args, **kwargs): """Popen wrapper that keeps track of the started processes and handles command output. @@ -206,6 +222,7 @@ def connect(self): def free(self): self.log_debug('Free hop %s' % repr(self)) for pid in self.started_pids: + # TODO: Why do we have this check again? if not security.is_sudo_or_root(): continue self.log_debug('Kill process %d' % pid) diff --git a/pallium/hops/socksapp.py b/pallium/hops/socksapp.py index c67cd71..98addd3 100644 --- a/pallium/hops/socksapp.py +++ b/pallium/hops/socksapp.py @@ -29,7 +29,7 @@ def get_tcp_connections(): yield local, remote -def wait_for_listener(addr, timeout: float = 30): +def wait_for_listener(addr, timeout: float = 30, exception_function=None): start_time = time.perf_counter() end_time = start_time + timeout if timeout is not None else None addr = ipaddress.ip_address(addr[0]), addr[1] @@ -44,6 +44,9 @@ def wait_for_listener(addr, timeout: float = 30): return False time.sleep(0.1) + if exception_function is not None: + exception_function() + class SocksAppHop(hop.Hop): def __init__(self, user: str, cmd=None, timeout: float = 30, **kwargs): @@ -51,8 +54,6 @@ def __init__(self, user: str, cmd=None, timeout: float = 30, **kwargs): self._tun2socks = None self._socks_endpoint = None self._user = user - if self._user is None: - self._user = security.real_user() self._timeout = timeout self.cmd = cmd self._proc_pid = None @@ -83,14 +84,21 @@ def connect(self): # kwargs = {'preexec_fn': netns.map_back_real} kwargs = {} - if security.is_sudo_or_root(): + if security.is_sudo_or_root() and self._user is not None: kwargs = sysutil.privilege_drop_preexec(self._user, True) process = self.popen(self.cmd, **kwargs) self._proc_pid = process.pid # Wait for the SOCKS listener to appear self.log_debug('Waiting for SSH socks endpoint to appear at %s.' % str(self._socks_endpoint)) - if not wait_for_listener(self._socks_endpoint): + + def ssh_error(): + returncode = process.poll() + if returncode is not None and returncode != 0: + # TODO: Include SSH output in exception. + raise ConnectionError('SSH exited with code %d' % returncode) + + if not wait_for_listener(self._socks_endpoint, exception_function=ssh_error): raise TimeoutError def next_hop(self) -> Optional[hop.Hop]: diff --git a/pallium/hops/ssh.py b/pallium/hops/ssh.py index 4210282..62e7e4f 100644 --- a/pallium/hops/ssh.py +++ b/pallium/hops/ssh.py @@ -24,8 +24,12 @@ def __init__(self, self._args = ssh_args if ssh_args is not None else [] def update_cmd(self, hop_info): + + # The correct home directory needs to be figured out by OpenSSH. + # If the user is fakeroot, the home directory inferred from /etc/passwd will be wrong. if not security.is_sudo_or_root(): sandbox.map_back_real() + self.cmd = ['ssh', '-N', '-D', '%s:%d' % self._socks_endpoint] if self._args is not None: self.cmd += self._args # Append custom user-provided arguments diff --git a/pallium/hops/tor.py b/pallium/hops/tor.py index 9e166a4..d0e4040 100644 --- a/pallium/hops/tor.py +++ b/pallium/hops/tor.py @@ -1,5 +1,6 @@ import ipaddress import os +import pwd import shutil import time from typing import Optional, List @@ -57,7 +58,7 @@ def __init__(self, *, timeout=300, circuit_build_timeout=60, builtin_dns=True, u self._circuit_build_timeout = circuit_build_timeout self._builtin_dns = builtin_dns if user is None: - user = security.real_user() + user = security.least_privileged_user() self._user = user self._onion_support = onion_support self.required_routes = [ipaddress.ip_network('0.0.0.0/0'), ipaddress.ip_network('::/0')] @@ -97,7 +98,7 @@ def connect(self): command += ['--Log', 'notice stderr'] kwargs = {'env': os.environ.copy()} - if security.is_sudo_or_root(): + if security.is_sudo_or_root() and self._user is not None: kwargs = sysutil.privilege_drop_preexec(self._user) tor_env = self.get_tool_env('tor') diff --git a/pallium/netns.py b/pallium/netns.py index d5eeed4..ddbf810 100644 --- a/pallium/netns.py +++ b/pallium/netns.py @@ -3,6 +3,7 @@ import pickle import signal import struct +import subprocess import traceback from typing import List, Optional @@ -64,6 +65,9 @@ def from_json(cls, obj): def to_json(self): return {'src': self.src, 'target': self.target} + def __repr__(self): + return str(self.to_json()) + # noinspection PyPep8Naming class classproperty(object): @@ -111,6 +115,7 @@ def __init__(self, name, etc_path=None, mounts=None, pid_path=None): self.fs = None self.fd_stack = [] self.pid = None + # TODO: Get rid of PID path. File creation should not be the responsibility of this class. self.pid_path = pid_path @classmethod @@ -131,91 +136,88 @@ def run(self, func, args=(), kwargs=None, wait=True, isolated=False, new_session kwargs = {} if self.is_default: return func(*args, **kwargs) - if security.is_sudo_or_root(): - with self: - return func(*args, **kwargs) - else: - rfchild, w2parent = os.pipe() - rfparent, w2child = os.pipe() - pid = os.fork() - if pid == 0: + + rfchild, w2parent = os.pipe() + rfparent, w2child = os.pipe() + pid = os.fork() + if pid == 0: + if new_session: + os.setpgid(0, 0) + sysutil.prctl(sysutil.PR_SET_PDEATHSIG, signal.SIGTERM) + onexit.clear() + os.close(rfchild) + os.close(w2child) + self.enter(exclude_ns=exclude_ns) + + # A second fork is performed because privileges may be dropped inside func which would prevent exit + # handlers from releasing the resources acquired by the overlay filesystem due to a lack of permissions. + pid2 = os.fork() + if pid2 == 0: if new_session: os.setpgid(0, 0) sysutil.prctl(sysutil.PR_SET_PDEATHSIG, signal.SIGTERM) onexit.clear() - os.close(rfchild) - os.close(w2child) - self.enter(exclude_ns=exclude_ns) - - # A second fork is performed because privileges may be dropped inside func which would prevent exit - # handlers from releasing the resources acquired by the overlay filesystem due to a lack of permissions. - pid2 = os.fork() - if pid2 == 0: - if new_session: - os.setpgid(0, 0) - sysutil.prctl(sysutil.PR_SET_PDEATHSIG, signal.SIGTERM) - onexit.clear() - raised = False - try: - result = func(*args, **kwargs) - except BaseException as e: - traceback.print_exc() - result = e - raised = True - serialized = pickle.dumps(result) - header = struct.pack('=?Q', raised, len(serialized)) - try: - sysutil.write_blocking(w2parent, header) - sysutil.write_blocking(w2parent, serialized) - sysutil.read_blocking(rfparent, 1) - except (sysutil.UnexpectedEOF, BrokenPipeError): - # Probably exception in parent. The parent will display the error. - sysutil.fork_exit(1) - os.close(w2parent) - os.close(rfparent) - else: - logging.getLogger(__name__).debug( - 'Inner fork: %s, child=%d, parent=%d' % (repr(self), pid2, os.getpid())) - os.waitpid(pid2, 0) - sysutil.fork_exit(0) - else: - logging.getLogger(__name__).debug('Fork: %s, child=%d, parent=%d' % (repr(self), pid, os.getpid())) - - # Initialize variables for static analysis - serialized = None raised = False - - os.close(w2parent) - os.close(rfparent) - if not wait: - os.close(w2child) - os.close(rfchild) - return try: - header = sysutil.read_blocking(rfchild, struct.calcsize('=?Q')) - raised, expected_length = struct.unpack('=?Q', header) - serialized = sysutil.read_blocking(rfchild, expected_length) - sysutil.write_blocking(w2child, b'\0') + result = func(*args, **kwargs) + except BaseException as e: + traceback.print_exc() + result = e + raised = True + serialized = pickle.dumps(result) + header = struct.pack('=?Q', raised, len(serialized)) + try: + sysutil.write_blocking(w2parent, header) + sysutil.write_blocking(w2parent, serialized) + sysutil.read_blocking(rfparent, 1) except (sysutil.UnexpectedEOF, BrokenPipeError): - # Probably exception in child. The child will display the error. + # Probably exception in parent. The parent will display the error. sysutil.fork_exit(1) + os.close(w2parent) + os.close(rfparent) + else: + logging.getLogger(__name__).debug( + 'Inner fork: %s, child=%d, parent=%d' % (repr(self), pid2, os.getpid())) + os.waitpid(pid2, 0) + sysutil.fork_exit(0) + else: + logging.getLogger(__name__).debug('Fork: %s, child=%d, parent=%d' % (repr(self), pid, os.getpid())) + + # Initialize variables for static analysis + serialized = None + raised = False + + os.close(w2parent) + os.close(rfparent) + if not wait: os.close(w2child) os.close(rfchild) - try: - os.waitpid(pid, 0) - except OSError: - pass - if not isolated: - # Unpickling should be safe even in the case of a privilege drop because it causes the dumpable - # attribute of a process to be set to 0. This ensures the integrity of the function code that is - # executed inside the child. In particular, an unprivileged process cannot tamper with its - # integrity and modify the pickled object sent to the parent. See `man 2 prctl` - # (PR_SET_DUMPABLE). See also `man 2 ptrace` (Ptrace access mode checking). Also see `man 5 proc`. - - result = pickle.loads(serialized) - if raised: - raise result - return result + return + try: + header = sysutil.read_blocking(rfchild, struct.calcsize('=?Q')) + raised, expected_length = struct.unpack('=?Q', header) + serialized = sysutil.read_blocking(rfchild, expected_length) + sysutil.write_blocking(w2child, b'\0') + except (sysutil.UnexpectedEOF, BrokenPipeError): + # Probably exception in child. The child will display the error. + sysutil.fork_exit(1) + os.close(w2child) + os.close(rfchild) + try: + os.waitpid(pid, 0) + except OSError: + pass + if not isolated: + # Unpickling should be safe even in the case of a privilege drop because it causes the dumpable + # attribute of a process to be set to 0. This ensures the integrity of the function code that is + # executed inside the child. In particular, an unprivileged process cannot tamper with its + # integrity and modify the pickled object sent to the parent. See `man 2 prctl` + # (PR_SET_DUMPABLE). See also `man 2 ptrace` (Ptrace access mode checking). Also see `man 5 proc`. + + result = pickle.loads(serialized) + if raised: + raise result + return result def mount_etc(self): if not os.path.isdir(self.etc_path): @@ -236,7 +238,7 @@ def umount_etc(self): except FileNotFoundError: pass - def _join_ns(self, tp): + def _join_ns(self, tp, stack=None): ns_map = { sysutil.CLONE_NEWUSER: 'user', sysutil.CLONE_NEWNET: 'net', @@ -245,78 +247,53 @@ def _join_ns(self, tp): sysutil.CLONE_NEWIPC: 'ipc', sysutil.CLONE_NEWUTS: 'uts', } + + if stack is not None: + stack[tp] = os.open('/proc/self/ns/' + ns_map[tp], os.O_RDONLY) + if self.pid is not None: proc_path = '/proc/%d/ns/' % self.pid else: proc_path = self.fd_path fd = os.open(os.path.join(proc_path, ns_map[tp]), os.O_RDONLY) - sysutil.setns(fd, tp) - os.close(fd) - pass + try: + sysutil.setns(fd, tp) + finally: + os.close(fd) def enter(self, overlay_fs=True, exclude_ns=0): - self.fd_stack.append(( - os.open('/proc/self/ns/net', os.O_RDONLY), - os.open('/proc/self/ns/mnt', os.O_RDONLY), - *([] if security.is_sudo_or_root() else (os.open('/proc/self/ns/user', os.O_RDONLY),)), - )) - - if security.is_sudo_or_root(): - fd = os.open(os.path.join(self._run_path, 'net'), os.O_RDONLY) - sysutil.setns(fd, sysutil.CLONE_NEWNET) - os.close(fd) - sysutil.mount(b'', b'/', b'none', sysutil.MS_SLAVE | sysutil.MS_REC, None) + old_cwd = os.getcwd() - sysutil.unshare(sysutil.CLONE_NEWNS) - sysutil.umount2(b'/sys', sysutil.MNT_DETACH) - sysutil.mount(self.fd_path.encode(), b'/sys', b'sysfs', 0, None) + stack = {} - for mount in self.mounts: - mount.mount() + if (exclude_ns & sysutil.CLONE_NEWUSER) == 0 and not security.is_sudo_or_root(): + self._join_ns(sysutil.CLONE_NEWUSER, stack) + if (exclude_ns & sysutil.CLONE_NEWNET) == 0: + self._join_ns(sysutil.CLONE_NEWNET, stack) + if (exclude_ns & sysutil.CLONE_NEWPID) == 0: + self._join_ns(sysutil.CLONE_NEWPID, stack) + if (exclude_ns & sysutil.CLONE_NEWIPC) == 0: + self._join_ns(sysutil.CLONE_NEWIPC, stack) + if (exclude_ns & sysutil.CLONE_NEWUTS) == 0: + self._join_ns(sysutil.CLONE_NEWUTS, stack) - if overlay_fs: - self.fs = OverlayMount('/etc', self.etc_path) - self.fs.start() - else: - self.mount_etc() - else: - old_cwd = os.getcwd() - - if (exclude_ns & sysutil.CLONE_NEWUSER) == 0: - self._join_ns(sysutil.CLONE_NEWUSER) - if (exclude_ns & sysutil.CLONE_NEWNET) == 0: - self._join_ns(sysutil.CLONE_NEWNET) - if (exclude_ns & sysutil.CLONE_NEWPID) == 0: - self._join_ns(sysutil.CLONE_NEWPID) - if (exclude_ns & sysutil.CLONE_NEWIPC) == 0: - self._join_ns(sysutil.CLONE_NEWIPC) - if (exclude_ns & sysutil.CLONE_NEWUTS) == 0: - self._join_ns(sysutil.CLONE_NEWUTS) - - # Mount namespace must be last - if (exclude_ns & sysutil.CLONE_NEWNS) == 0: - self._join_ns(sysutil.CLONE_NEWNS) + # Mount namespace must be last + if (exclude_ns & sysutil.CLONE_NEWNS) == 0: + self._join_ns(sysutil.CLONE_NEWNS, stack) - try: - os.chdir(old_cwd) - except FileNotFoundError: - pass + self.fd_stack.append(stack) + + try: + os.chdir(old_cwd) + except FileNotFoundError: + pass def exit(self): cwd = os.getcwd() - fds = self.fd_stack.pop() - netns_fd, mnt_fd = fds[0:2] - - if not security.is_sudo_or_root(): - sysutil.setns(fds[2], sysutil.CLONE_NEWUSER) - os.close(fds[2]) - - sysutil.setns(netns_fd, sysutil.CLONE_NEWNET) - os.close(netns_fd) - sysutil.setns(mnt_fd, sysutil.CLONE_NEWNS) - os.close(mnt_fd) + for tp, fd in self.fd_stack.pop(): + sysutil.setns(fd, tp) os.chdir(cwd) @@ -353,14 +330,18 @@ def create(self): os.close(read) self._nodefault() - if not security.is_sudo_or_root(): - real_user = security.real_user() - real_group = security.real_group() + real_user = security.real_user() + real_group = security.real_group() + if not security.is_sudo_or_root(): sandbox.map_user(real_user, real_group) - sysutil.unshare(sysutil.CLONE_NEWNS) - self.fs = OverlayMount('/etc', self.etc_path) - self.fs.start() + sysutil.unshare(sysutil.CLONE_NEWNS) + + # Do not propagate up. + sysutil.mount(b'', b'/', b'none', sysutil.MS_SLAVE | sysutil.MS_REC, None) + + self.fs = OverlayMount('/etc', self.etc_path) + self.fs.start() sysutil.unshare(sysutil.CLONE_NEWNET) sysutil.unshare(sysutil.CLONE_NEWPID) @@ -373,12 +354,11 @@ def create(self): os.setpgid(0, 0) # This is the init process inside our PID namespace (PID 1). # When it is killed, all other processes inside the namespace are killed as well. - if not security.is_sudo_or_root(): - sysutil.mount(b'proc', b'/proc', b'proc', 0, None) + sysutil.mount(b'proc', b'/proc', b'proc', 0, None) - # Prevent zombie processes - signal.signal(signal.SIGCHLD, signal.SIG_IGN) - sysutil.prctl(sysutil.PR_SET_CHILD_SUBREAPER, 1) + # Prevent zombie processes + signal.signal(signal.SIGCHLD, signal.SIG_IGN) + sysutil.prctl(sysutil.PR_SET_CHILD_SUBREAPER, 1) # This is a dirty hack. Mounts at / do not become effective unless rejoining the mount namespace. # Therefore, we signal changed root mounts through SIGUSR1. diff --git a/pallium/profiles.py b/pallium/profiles.py index a065036..b498517 100644 --- a/pallium/profiles.py +++ b/pallium/profiles.py @@ -94,7 +94,6 @@ def __init__(self, conf: config.Configuration): @param routes: Routes to be routed through the chain. Default: 0.0.0.0/0 and ::0/0 @param preexec_fn: Functions to be executed inside the main network namespace before running the profile. @param postexec_fn: Cleanup functions to be executed when tearing down the connection. - @param enter: When enabled, `with Profile(...)` will cause code in the context to be executed in the last netns. @param kill_switch: When enabled, traffic is not allowed to bypass hops. """ self._filepath = None @@ -128,8 +127,6 @@ def __init__(self, conf: config.Configuration): for hop in self.chain: if self.debug: hop.debug = True - # TODO: Make use of this? - self._enter = False self._context_sessions = [] self.kill_switch = conf.networking.kill_switch self._mounts = [] @@ -175,127 +172,6 @@ def from_config(cls, settings: dict) -> 'Profile': return Profile(pallium.config.Configuration.from_json(settings)) - if 'chain' not in settings: - settings['chain'] = [] - - profile_args = { - 'preexec_fn': [], - 'quiet': settings.get('quiet', None) - } - - bridge = None - if 'bridge' in settings: - bridge = dict(BRIDGE_DEFAULTS) - bridge.update(settings['bridge']) - bridge = Bridge.from_json(bridge) - profile_args['bridge'] = bridge - - if 'start_networks' in settings: - profile_args['start_networks'] = settings['start_networks'] - raise ConfigurationError('Currently unsupported') - - sandbox = None - if 'sandbox' in settings and not security.is_sudo_or_root(): - sandbox = Sandbox.from_json(settings['sandbox']) - elif not security.is_sudo_or_root(): - sandbox = Sandbox() - profile_args['sandbox'] = sandbox - - # This is (partially) how the configuration should be built. - # TODO: Build the configuration like this for all object properties. - # When complete, from_config should simply call config.Configuration.from_json. - port_forwarding = settings.get('networking', {}).get('port_forwarding', {}) - profile_args['configuration'] = config.Configuration( - networking=config.Networking( - port_forwarding=config.PortForwarding.from_json(port_forwarding) - ) - ) - - type2class = dict() - for hop_class in util.get_subclasses(hops.Hop): - class_name = hop_class.__name__ - if hop_class.__name__.endswith('Hop'): - class_name = class_name[:-len('Hop')] - type2class[class_name.lower()] = hop_class - - chain = [] - pulseaudio_proxy = [] - connected_functions = [] - for hop_index, hop_option in enumerate(settings['chain']): - if 'type' not in hop_option: - raise ConfigurationError('Type property required.') - tp = hop_option['type'] - del hop_option['type'] - if tp not in type2class: - raise ConfigurationError('Invalid hop type: "%s"' % tp) - - remove = [] - for k in hop_option: - if k == 'dns': - dns_addrs = hop_option[k] - if isinstance(dns_addrs, str): - dns_addrs = [dns_addrs] - dns_addrs = set(dns_addrs) - - proxied_addrs = [] - non_proxied_addrs = [] - for addr in dns_addrs: - if addr.startswith('tcp://'): - proxied_addrs.append(addr[6:]) - else: - non_proxied_addrs.append(addr) - - dns = non_proxied_addrs - if len(proxied_addrs) > 0: - dns.append(DnsTcpProxy(proxied_addrs)) - hop_option['dns'] = dns - pass - elif not util.supports_named_arg(type2class[tp], k): - raise ConfigurationError('Unsupported property "%s" for hop of type "%s"' % (k, tp)) - - for r in remove: - del hop_option[r] - - # noinspection PyArgumentList - # hop = type2class[tp](**hop_option) - # chain.append(hop) - chain = [hops.Hop.from_json(h) for h in settings['chain']] - - profile_args['routes'] = settings.get('routes', None) - profile_args['mounts'] = [] - - if 'run' in settings: - run = settings['run'] - - user = run.get('user', os.environ.get('SUDO_USER', os.getuid())) - profile_args['user'] = user - - if run.get('gui', False) and security.is_sudo_or_root(): - profile_args['preexec_fn'].append(lambda: enable_gui_access(user)) - - if run.get('audio', None) and 'SUDO_USER' in os.environ and 'virtuser' not in run: - pulseaudio_proxy.append([os.environ['SUDO_USER'], user]) - - if 'command' in run: - profile_args['command'] = run['command'] - else: - profile_args['user'] = os.environ.get('SUDO_USER', os.getuid()) - - if 'command' not in profile_args: - shell = os.environ.get('SHELL', '/usr/bin/sh') - profile_args['command'] = [shell] - - dummy = DummyHop() - dummy.on_connected(connected_functions) - chain.append(dummy) - - for p in pulseaudio_proxy: - profile_args['preexec_fn'].append(lambda: audio.proxy_pulseaudio(*p)) - - profile = Profile(chain, **profile_args) - profile.has_connected_functions = len(connected_functions) > 0 - return profile - def _create_profile_folder(self): if not os.path.exists(runtime.APP_RUN_DIR): os.mkdir(runtime.APP_RUN_DIR, 0o711) @@ -448,14 +324,10 @@ def hash_id(self): def __enter__(self): session = self.run() self._context_sessions.append(session) - if self._enter: - session.network_namespaces[-1].enter() return session def __exit__(self, exc_type, exc_val, exc_tb): session = self._context_sessions.pop() - if self._enter: - session.network_namespaces[-1].exit() session.close() @@ -1108,7 +980,7 @@ def run_in_hop_netns(): if self._bridge.dhcp: # TODO: Transform for fork support. - dns = hop_info.netns.run(resolvconf.parse) + dns = resolvconf.parse() dhcp_server = dhcpd.DHCPServer(nets, bridge_name_out, dns=dns) dhcp_server.start() @@ -1353,8 +1225,7 @@ def _new_netns(self, index): pid_path = os.path.join(self.session_folder, 'netns', 'pids', str(index)) if not os.path.exists(etc_path): os.mkdir(etc_path, 0o755) - if not security.is_sudo_or_root(): - shutil.copyfile('/etc/resolv.conf', os.path.join(etc_path, 'resolv.conf')) + shutil.copyfile('/etc/resolv.conf', os.path.join(etc_path, 'resolv.conf')) return NetworkNamespace(fd_path, etc_path, pid_path=pid_path) def run(self, *args, **kwargs): @@ -1362,7 +1233,7 @@ def run(self, *args, **kwargs): return self.profile.sandbox.run(self, *args, **kwargs) else: call_args = {} - if not kwargs.get('root', False): + if self.profile.user is not None: call_args = sysutil.privilege_drop_preexec(self.profile.user, True) call_args.update(kwargs.get('call_args', {})) ns = self.network_namespaces[-1] diff --git a/pallium/sandbox.py b/pallium/sandbox.py index bd106a5..79b5307 100644 --- a/pallium/sandbox.py +++ b/pallium/sandbox.py @@ -533,7 +533,7 @@ def enter(self, root=False): else: logging.error('Working directory "%s" is not a directory' % self.working_dir) - if self.gvisor is False and not root: + if self.gvisor is False and not root and not security.is_sudo_or_root(): map_back_real() # We support disabling user namespaces to reduce the attack surface of the kernel inside the sandbox. @@ -635,7 +635,7 @@ def run(self, session, argv, ns_index=-1, root=False, call_args=None, terminal=F use_gvisor = self.gvisor is not False and ns_index == -1 profile = session.profile - if security.is_sudo_or_root(): + if profile.user: call_args.update(sysutil.privilege_drop_preexec(profile.user, True)) else: def preexec_fn(): @@ -674,10 +674,12 @@ def preexec_fn(): call_args['pass_fds'] = [runsc_fd, gvisor_config_dir_fd, gvisor_init_fd] call_args['shell'] = False + # If we have real root privileges, we do not make use of user namespaces, + # while the root variable indicates that we want to use fakeroot (i.e. a 0-mapped UID in the namespace) map_user_args = [ '--uid-map', '%d 0 1' % security.RUID, '--gid-map', '%d 0 1' % security.RGID - ] if not root else [] + ] if not root and not security.is_sudo_or_root() else [] controlling_terminal = [ '--controlling-terminal' @@ -698,6 +700,8 @@ def preexec_fn(): argv_orig[0] = shutil.which(argv_orig[0]) argv_run += argv_orig + effective, permitted, inheritable = tuple(map(sysutil.bitmask_to_str_capset, sysutil.capget())) + spec = { "root": { "path": "/" @@ -707,6 +711,12 @@ def preexec_fn(): "cwd": self.working_dir, "args": argv_run, "terminal": terminal, + "capabilities": { + "effective": list(effective), + "permitted": list(effective), + "inheritable": list(effective), + "bounding": list(effective), + } }, "hostname": self.hostname } @@ -722,8 +732,7 @@ def preexec(): traceback.print_exc() sys.exit(1) - call_args.update(dict( - preexec_fn=preexec)) + call_args.update({'preexec_fn': preexec}) if terminal: # Ignore all signals that we can ignore @@ -745,7 +754,7 @@ def preexec(): '--network=host', # '--debug', '--debug-log', '/tmp/gvisor-debug.txt', '--file-access=shared', - '--rootless', + *(['--rootless'] if not security.is_sudo_or_root() else []), '--host-uds=all', '--root=' + gvisor_config_dir, '--overlay2=none', @@ -772,6 +781,7 @@ def run(): except: traceback.print_exc() sys.exit(1) + # TODO: Fix os.kill(session.sandbox_pid, signal.SIGUSR1) ns.run(run, new_session=False) diff --git a/pallium/security.py b/pallium/security.py index 7fc6b1a..689ca8c 100644 --- a/pallium/security.py +++ b/pallium/security.py @@ -1,3 +1,4 @@ +import pwd import sys import os import stat @@ -17,9 +18,7 @@ def is_sudo_or_root() -> bool: global _is_sudo_or_root if _is_sudo_or_root is not None: return _is_sudo_or_root - ruid, euid, suid = os.getresuid() - # rgid, egid, sgid = os.getresgid() - _is_sudo_or_root = ruid == 0 or euid == 0 + _is_sudo_or_root = os.getuid() == 0 return _is_sudo_or_root @@ -32,6 +31,13 @@ def real_user() -> int: return ruid +def least_privileged_user(): + try: + return pwd.getpwnam("nobody").pw_uid + except KeyError: + return 0xffff + + def real_group() -> int: ruid, euid, suid = os.getresgid() if ruid == 0: diff --git a/pallium/sysutil.py b/pallium/sysutil.py index e2e0de8..5f357b1 100644 --- a/pallium/sysutil.py +++ b/pallium/sysutil.py @@ -7,6 +7,7 @@ import signal import socket import struct +import typing from typing import Optional, Union, List from . import onexit from . import security @@ -74,13 +75,14 @@ CAP_BPF = 1 << 39 CAP_CHECKPOINT_RESTORE = 1 << 40 -LINUX_CAPABILITY_VERSION_2 = 0x20071026 LINUX_CAPABILITY_VERSION_3 = 0x20080522 IFNAMSIZ = 16 SIOCSIFNAME = 0x8923 +SYSCALL_CAPGET = 125 +SYSCALL_CAPSET = 126 class UserCapHeader(ctypes.Structure): _fields_ = [ @@ -106,28 +108,65 @@ class UserCapData(ctypes.Structure): _pivot_root = ctypes.CDLL(None).syscall _pivot_root.restype = ctypes.c_int _pivot_root.argtypes = ctypes.c_char_p, ctypes.c_char_p + +# Reference: https://elixir.bootlin.com/linux/latest/A/ident/capset _capset = ctypes.CDLL(None).syscall _capset.restype = ctypes.c_int -_capset.argtypes = ctypes.c_long, ctypes.POINTER(UserCapHeader), ctypes.POINTER(UserCapData) +_capset.argtypes = ctypes.c_long, ctypes.POINTER(UserCapHeader), ctypes.POINTER(UserCapData * 2) + +# Reference: https://elixir.bootlin.com/linux/latest/A/ident/capget +_capget = ctypes.CDLL(None).syscall +_capget.restype = ctypes.c_int +_capget.argtypes = ctypes.c_long, ctypes.POINTER(UserCapHeader), ctypes.POINTER(UserCapData * 2) def capset(effective=0, permitted=0, inheritable=0, pid=0): - """if pid is None: - pid = os.getpid()""" - header = UserCapHeader() - header.version = LINUX_CAPABILITY_VERSION_3 - header.pid = pid - - data = UserCapData() - data.effective = effective - data.permitted = permitted - data.inheritable = inheritable - ret = _capset(126, header, data) + header = UserCapHeader(version=LINUX_CAPABILITY_VERSION_3, pid=pid) + + data = (UserCapData * 2)() + + # First struct contains low bits (irrespective of endianness) + data[0].effective = effective & 0xffffffff + data[0].permitted = permitted & 0xffffffff + data[0].inheritable = inheritable & 0xffffffff + + # Second struct contains high bits (irrespective of endianness) + data[1].effective = (effective >> 32) & 0xffffffff + data[1].permitted = (permitted >> 32) & 0xffffffff + data[1].inheritable = (inheritable >> 32) & 0xffffffff + + ret = _capset(SYSCALL_CAPSET, header, data) if ret < 0: - errno = ctypes.get_errno() + errno = -ret raise OSError(errno, 'Capset error: {}'.format(os.strerror(errno))) +def bitmask_to_str_capset(mask) -> typing.Set[str]: + caps = set() + cap_var_names = [x for x in globals().keys() if x.startswith('CAP_')] + for var in cap_var_names: + if mask & globals()[var] != 0: + caps.add(var) + return caps + + +def capget(pid=0): + header = UserCapHeader(version=LINUX_CAPABILITY_VERSION_3, pid=pid) + data = (UserCapData * 2)() + + ret = _capget(SYSCALL_CAPGET, ctypes.byref(header), ctypes.byref(data)) + + if ret != 0: + errno = -ret + raise OSError(errno, os.strerror(errno)) + + effective = (data[0].effective | (data[1].effective << 32)) + permitted = (data[0].permitted | (data[1].permitted << 32)) + inheritable = (data[0].inheritable | (data[1].inheritable << 32)) + + return effective, permitted, inheritable + + class ReadWriteError(Exception): pass @@ -276,9 +315,6 @@ def drop_privileges(user: Union[int, str], change_home: bool = False, group: Uni temporary: bool = False) -> None: pw_entry = get_pw_entry(user) - if not security.is_sudo_or_root(): - raise Exception("Dropping privileges is disallowed") - if not temporary: os.setgroups(os.getgrouplist(pw_entry.pw_name, pw_entry.pw_gid)) @@ -301,9 +337,13 @@ def get_real_user(): def privilege_drop_preexec(user: Union[int, str], change_home: bool = False, group: Union[int, str, None] = None, temporary: bool = False, no_new_privs: bool = False): def f(): - drop_privileges(user, change_home, group, temporary) - if no_new_privs: - prctl(PR_SET_NO_NEW_PRIVS, 1) + try: + drop_privileges(user, change_home, group, temporary) + if no_new_privs: + prctl(PR_SET_NO_NEW_PRIVS, 1) + except: + import traceback + traceback.print_exc() env = dict(os.environ) if change_home: pw_entry = get_pw_entry(user) diff --git a/pallium/xpra.py b/pallium/xpra.py index 77aebb7..34b0eb7 100644 --- a/pallium/xpra.py +++ b/pallium/xpra.py @@ -1,3 +1,4 @@ +import logging import os.path import random import signal @@ -12,8 +13,10 @@ def kill_preexec(): sysutil.prctl(sysutil.PR_SET_PDEATHSIG, signal.SIGTERM) -def wait_for_file_exists(path): +def wait_for_file_exists(path, raise_function=None): while not os.path.exists(path): + if raise_function: + raise_function() time.sleep(0.1) @@ -29,19 +32,21 @@ def find_unused_display(): def start_xpra(quiet=True): kwargs = {} - if quiet: + if quiet and logging.getLogger().level != logging.DEBUG: kwargs = { 'stdout': subprocess.DEVNULL, 'stderr': subprocess.DEVNULL, } display_no = find_unused_display() - subprocess.Popen([ + p = subprocess.Popen([ 'xpra', 'start', ':%d' % display_no, '--attach=yes', '--daemon=no', - '--dbus-proxy=no', + # The dbus proxy option has been deprecated since version 6: + # https://github.com/Xpra-org/xpra/blob/6b1b939f4dd7155778c2b32079849c54d2dcfb2b/xpra/scripts/config.py#L755 + # '--dbus-proxy=no', '--dbus-launch=no', '--dbus-control=no', '--mdns=no', @@ -51,7 +56,12 @@ def start_xpra(quiet=True): preexec_fn=kill_preexec, **kwargs, start_new_session=True) - wait_for_file_exists('/tmp/.X11-unix/X%d' % display_no) + + def raise_process_error(): + if p.poll() is not None: + raise ChildProcessError('xpra exited with error code %d' % p.returncode) + + wait_for_file_exists('/tmp/.X11-unix/X%d' % display_no, raise_process_error) display = xlib.Display(display_no) display.disable_access_control() return display_no diff --git a/tests/test_api.py b/tests/test_api.py index 1f1689b..50137e7 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -409,9 +409,15 @@ def _test_bridge_dhcp(self, test_dns, test_eth_bridge): if pid == 0: os.close(read_from_child) os.close(write_to_child) + sysutil.unshare(sysutil.CLONE_NEWNS | sysutil.CLONE_NEWNET | sysutil.CLONE_NEWPID) + + # Do not propagate to parent. + sysutil.mount(b'', b'/', b'none', sysutil.MS_SLAVE | sysutil.MS_REC, None) + pid2 = os.fork() if pid2 == 0: + # We create an overlay file system, so the /etc/resolv.conf of the main ns is not affected. netns.MountInstruction('tmp', '/tmp', 'tmpfs').mount() os.mkdir('/tmp/etc') os.mkdir('/tmp/workdir')