diff --git a/config/crds/troubleshoot.sh_hostcollectors.yaml b/config/crds/troubleshoot.sh_hostcollectors.yaml index 1d41e643b..7e97a27fd 100644 --- a/config/crds/troubleshoot.sh_hostcollectors.yaml +++ b/config/crds/troubleshoot.sh_hostcollectors.yaml @@ -1240,6 +1240,15 @@ spec: required: - paths type: object + cgroups: + properties: + collectorName: + type: string + exclude: + type: BoolString + mountPoint: + type: string + type: object copy: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_hostpreflights.yaml b/config/crds/troubleshoot.sh_hostpreflights.yaml index 5ea08cf15..980eb7faf 100644 --- a/config/crds/troubleshoot.sh_hostpreflights.yaml +++ b/config/crds/troubleshoot.sh_hostpreflights.yaml @@ -1240,6 +1240,15 @@ spec: required: - paths type: object + cgroups: + properties: + collectorName: + type: string + exclude: + type: BoolString + mountPoint: + type: string + type: object copy: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_supportbundles.yaml b/config/crds/troubleshoot.sh_supportbundles.yaml index 3da9f5236..6b6bca7db 100644 --- a/config/crds/troubleshoot.sh_supportbundles.yaml +++ b/config/crds/troubleshoot.sh_supportbundles.yaml @@ -19839,6 +19839,15 @@ spec: required: - paths type: object + cgroups: + properties: + collectorName: + type: string + exclude: + type: BoolString + mountPoint: + type: string + type: object copy: properties: collectorName: diff --git a/go.mod b/go.mod index ad4312111..9e2b6d272 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,8 @@ require ( github.com/ahmetalpbalkan/go-cursor v0.0.0-20131010032410-8136607ea412 github.com/apparentlymart/go-cidr v1.1.0 github.com/blang/semver/v4 v4.0.0 + github.com/cilium/ebpf v0.11.0 + github.com/containerd/cgroups/v3 v3.0.3 github.com/containers/image/v5 v5.31.1 github.com/distribution/distribution/v3 v3.0.0-beta.1 github.com/fatih/color v1.17.0 @@ -68,9 +70,9 @@ require ( github.com/Masterminds/squirrel v1.5.4 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect - github.com/containerd/cgroups/v3 v3.0.3 // indirect github.com/containerd/errdefs v0.1.0 // indirect github.com/containerd/log v0.1.0 // indirect + github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/docker/distribution v2.8.3+incompatible // indirect @@ -207,7 +209,7 @@ require ( github.com/nsf/termbox-go v0.0.0-20190121233118-02980233997d // indirect github.com/nwaples/rardecode v1.1.2 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/runtime-spec v1.2.0 // indirect + github.com/opencontainers/runtime-spec v1.2.0 github.com/opencontainers/selinux v1.11.0 // indirect github.com/ostreedev/ostree-go v0.0.0-20210805093236-719684c64e4f // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect @@ -235,7 +237,7 @@ require ( golang.org/x/crypto v0.25.0 // indirect golang.org/x/net v0.27.0 golang.org/x/oauth2 v0.20.0 // indirect - golang.org/x/sys v0.22.0 // indirect + golang.org/x/sys v0.22.0 golang.org/x/term v0.22.0 // indirect golang.org/x/text v0.16.0 golang.org/x/time v0.5.0 // indirect diff --git a/go.sum b/go.sum index 0afc15713..a15fee7f4 100644 --- a/go.sum +++ b/go.sum @@ -274,6 +274,8 @@ github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObk github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= +github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= +github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= @@ -415,6 +417,7 @@ github.com/gobuffalo/packr/v2 v2.8.3 h1:xE1yzvnO56cUC0sTpKR3DIbxZgB54AftTFMhB2XE github.com/gobuffalo/packr/v2 v2.8.3/go.mod h1:0SahksCVcx4IMnigTjiFuyldmTrdTctXsOdiU5KwbKc= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -962,6 +965,8 @@ go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lI go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= diff --git a/pkg/apis/troubleshoot/v1beta2/hostcollector_shared.go b/pkg/apis/troubleshoot/v1beta2/hostcollector_shared.go index 5b8d32b24..f7d31b8ee 100644 --- a/pkg/apis/troubleshoot/v1beta2/hostcollector_shared.go +++ b/pkg/apis/troubleshoot/v1beta2/hostcollector_shared.go @@ -76,6 +76,11 @@ type HostCopy struct { Path string `json:"path" yaml:"path"` } +type HostCGroups struct { + HostCollectorMeta `json:",inline" yaml:",inline"` + MountPoint string `json:"mountPoint,omitempty" yaml:"mountPoint,omitempty"` +} + type HostTime struct { HostCollectorMeta `json:",inline" yaml:",inline"` } @@ -219,8 +224,11 @@ type HostCollect struct { HostRun *HostRun `json:"run,omitempty" yaml:"run,omitempty"` HostCopy *HostCopy `json:"copy,omitempty" yaml:"copy,omitempty"` HostKernelConfigs *HostKernelConfigs `json:"kernelConfigs,omitempty" yaml:"kernelConfigs,omitempty"` + HostCGroups *HostCGroups `json:"cgroups,omitempty" yaml:"cgroups,omitempty"` } +// GetName gets the name of the collector +// Deprecated: This function is not used anywhere and should be removed. Do not use it. func (c *HostCollect) GetName() string { // TODO: Is this used anywhere? Should we just remove it? var collector string diff --git a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go index 19133ee57..1a003f03a 100644 --- a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go @@ -1906,6 +1906,22 @@ func (in *HostBlockDevices) DeepCopy() *HostBlockDevices { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostCGroups) DeepCopyInto(out *HostCGroups) { + *out = *in + in.HostCollectorMeta.DeepCopyInto(&out.HostCollectorMeta) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostCGroups. +func (in *HostCGroups) DeepCopy() *HostCGroups { + if in == nil { + return nil + } + out := new(HostCGroups) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HostCertificatesCollection) DeepCopyInto(out *HostCertificatesCollection) { *out = *in @@ -2077,6 +2093,11 @@ func (in *HostCollect) DeepCopyInto(out *HostCollect) { *out = new(HostKernelConfigs) (*in).DeepCopyInto(*out) } + if in.HostCGroups != nil { + in, out := &in.HostCGroups, &out.HostCGroups + *out = new(HostCGroups) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostCollect. diff --git a/pkg/collect/host_cgroup.go b/pkg/collect/host_cgroup.go new file mode 100644 index 000000000..5a81a5bd3 --- /dev/null +++ b/pkg/collect/host_cgroup.go @@ -0,0 +1,99 @@ +// This Control Groups collector is heavily based on k0s' +// probes implementation https://github.com/k0sproject/k0s/blob/main/internal/pkg/sysinfo/probes/linux/cgroups.go + +package collect + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "strings" + + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "k8s.io/klog/v2" +) + +const hostCGroupsPath = `host-collectors/system/cgroups.json` + +type CollectHostCGroups struct { + hostCollector *troubleshootv1beta2.HostCGroups + BundlePath string +} + +type cgroupResult struct { + Enabled bool `json:"enabled"` + MountPoint string `json:"mountPoint"` + Controllers []string `json:"controllers"` +} + +type cgroupsResult struct { + CGroupEnabled bool `json:"cgroup-enabled"` + CGroupV1 cgroupResult `json:"cgroup-v1"` + CGroupV2 cgroupResult `json:"cgroup-v2"` + // AllControllers is a list of all cgroup controllers found in the system + AllControllers []string `json:"allControllers"` +} + +func (c *CollectHostCGroups) Title() string { + return hostCollectorTitleOrDefault(c.hostCollector.HostCollectorMeta, "cgroups") +} + +func (c *CollectHostCGroups) IsExcluded() (bool, error) { + return isExcluded(c.hostCollector.Exclude) +} + +func (c *CollectHostCGroups) Collect(progressChan chan<- interface{}) (map[string][]byte, error) { + // https://man7.org/linux/man-pages/man7/cgroups.7.html + // Implementation is based on https://github.com/k0sproject/k0s/blob/main/internal/pkg/sysinfo/probes/linux/cgroups.go + + if c.hostCollector.MountPoint == "" { + c.hostCollector.MountPoint = "/sys/fs/cgroup" + } + + results, err := discoverConfiguration(c.hostCollector.MountPoint) + if err != nil { + return nil, err + } + + // Save the results + resultsJson, err := json.MarshalIndent(results, "", " ") + if err != nil { + return nil, err + } + + output := NewResult() + err = output.SaveResult(c.BundlePath, hostCGroupsPath, bytes.NewBuffer(resultsJson)) + if err != nil { + return nil, err + } + + return output, nil +} + +func parseV1ControllerNames(r io.Reader) ([]string, error) { + names := []string{} + var lineNo uint + lines := bufio.NewScanner(r) + for lines.Scan() { + lineNo = lineNo + 1 + if err := lines.Err(); err != nil { + return nil, fmt.Errorf("failed to parse /proc/cgroups at line %d: %w ", lineNo, err) + } + text := lines.Text() + if len(text) == 0 { + continue + } + + if text[0] != '#' { + parts := strings.Fields(text) + if len(parts) >= 4 && parts[3] != "0" { + names = append(names, parts[0]) + } + } + } + klog.V(2).Info("cgroup v1 controllers loaded") + + return names, nil +} diff --git a/pkg/collect/host_cgroup_linux.go b/pkg/collect/host_cgroup_linux.go new file mode 100644 index 000000000..f7cc4866a --- /dev/null +++ b/pkg/collect/host_cgroup_linux.go @@ -0,0 +1,330 @@ +// This Control Groups collector is heavily based on k0s' +// probes implementation https://github.com/k0sproject/k0s/blob/main/internal/pkg/sysinfo/probes/linux/cgroups.go + +//go:build linux + +package collect + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "sort" + "strings" + "syscall" + + "github.com/cilium/ebpf/rlimit" + "github.com/containerd/cgroups/v3/cgroup2" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" + + "k8s.io/klog/v2" + "k8s.io/utils/ptr" +) + +func discoverConfiguration(mountPoint string) (cgroupsResult, error) { + results := cgroupsResult{} + + var st syscall.Statfs_t + if err := syscall.Statfs(mountPoint, &st); err != nil { + if os.IsNotExist(err) { + klog.V(2).Infof("no file system mounted at %q", mountPoint) + return results, nil + } + + return results, fmt.Errorf("failed to stat %q: %w", mountPoint, err) + } + + switch st.Type { + case unix.CGROUP2_SUPER_MAGIC: + klog.V(2).Infof("cgroup v2 mounted at %q", mountPoint) + // Discover cgroup2 and controllers enabled + // https://www.kernel.org/doc/html/v5.16/admin-guide/cgroup-v2.html#mounting + v, err := discoverV2Configuration(mountPoint) + if err != nil { + return results, fmt.Errorf("failed to discover cgroup v2 configuration from %s mount point: %w", mountPoint, err) + } + results.CGroupV2 = v + case unix.CGROUP_SUPER_MAGIC, unix.TMPFS_MAGIC: + klog.V(2).Infof("cgroup v1 mounted at %q", mountPoint) + // Discover cgroup1 and controllers enabled + // https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/cgroups.7?h=man-pages-5.13#n159 + // https://www.kernel.org/doc/html/v5.16/admin-guide/cgroup-v1/cgroups.html#how-do-i-use-cgroups + r, err := discoverV1Configuration(mountPoint) + if err != nil { + return results, fmt.Errorf("failed to discover cgroup v1 configuration from %s mount point: %w", mountPoint, err) + } + results.CGroupV1 = r + default: + return results, fmt.Errorf("unexpected file system type of %q: 0x%x", mountPoint, st.Type) + } + + // If cgroup1 or cgroup2 is enabled + results.CGroupEnabled = results.CGroupV1.Enabled || results.CGroupV2.Enabled + + // Sort controllers for consistent output + if len(results.CGroupV1.Controllers) > 0 { + sort.Strings(results.CGroupV1.Controllers) + } else { + results.CGroupV1.Controllers = []string{} + } + if len(results.CGroupV2.Controllers) > 0 { + sort.Strings(results.CGroupV2.Controllers) + } else { + results.CGroupV2.Controllers = []string{} + } + + // Combine all controllers + set := make(map[string]struct{}) + for _, c := range results.CGroupV1.Controllers { + set[c] = struct{}{} + } + + for _, c := range results.CGroupV2.Controllers { + set[c] = struct{}{} + } + + for c := range set { + results.AllControllers = append(results.AllControllers, c) + } + sort.Strings(results.AllControllers) + + return results, nil +} + +func discoverV1Configuration(mountPoint string) (cgroupResult, error) { + res := cgroupResult{} + // Get the available controllers from /proc/cgroups. + // See https://www.man7.org/linux/man-pages/man7/cgroups.7.html#NOTES + + f, err := os.Open("/proc/cgroups") + if err != nil { + return res, fmt.Errorf("failed to open /proc/cgroups: %w", err) + } + defer f.Close() + + names, err := parseV1ControllerNames(f) + if err != nil { + return res, err + } + + res.Enabled = true + res.Controllers = names + res.MountPoint = mountPoint + + return res, nil +} + +func discoverV2Configuration(mountPoint string) (cgroupResult, error) { + res := cgroupResult{} + + // Detect all the listed root controllers. + controllers, err := detectV2Controllers(mountPoint) + if err != nil { + return res, err + } + + res.Enabled = true + res.Controllers = controllers + res.MountPoint = mountPoint + return res, nil +} + +// Detects all the listed root controllers. +// +// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files +func detectV2Controllers(mountPoint string) ([]string, error) { + root, err := cgroup2.Load("/", cgroup2.WithMountpoint(mountPoint)) + if err != nil { + return nil, fmt.Errorf("failed to load root cgroup: %w", err) + } + + // Load root controllers + controllerNames, err := root.RootControllers() // This reads cgroup.controllers + if err != nil { + return nil, fmt.Errorf("failed to list cgroup root controllers: %w", err) + } + + for _, c := range controllerNames { + if c == "cpu" { + // If the cpu controller is enabled, the cpuacct controller is also enabled. + // This controller succeeded v1's cpuacct and cpu controllers. + // https://www.man7.org/linux/man-pages/man7/cgroups.7.html + controllerNames = append(controllerNames, "cpuacct") + } + } + + // Detect freezer controller + if detectV2FreezerController(mountPoint) { + controllerNames = append(controllerNames, "freezer") + } + + // Detect devices controller + if detectV2DevicesController(mountPoint) { + controllerNames = append(controllerNames, "devices") + } + + return controllerNames, nil +} + +// Detects the device controller by trying to attach a dummy program of type +// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files +// and is implemented purely on top of BPF, this is the only reliable way to +// detect it. A best-guess detection via the kernel version has the major +// drawback of not working with kernels that have a lot of backported features, +// such as RHEL and friends. +// +// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller +func detectV2DevicesController(mountPoint string) bool { + err := attachDummyDeviceFilter(mountPoint) + switch { + case err == nil: + klog.V(2).Info("eBPF device filter program successfully attached") + return true + // EACCES occurs when not allowed to create cgroups. + // EPERM occurs when not allowed to load eBPF programs. + case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0: + // Insufficient permissions. Loading the eBPF program requires elevated permissions + return true + case errors.Is(err, unix.EROFS): + // Read-only file system detected when trying to create a temporary cgroup + return true + case eBPFProgramUnsupported(err): + klog.V(2).Info("eBPF device filter program is unsupported by the kernel") + return false + } + + klog.V(2).Infof("failed to attach eBPF device filter program: %v", err) + return false +} + +// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created +// cgroup and removes the program and cgroup again. +func attachDummyDeviceFilter(mountPoint string) (err error) { + insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{ + Allow: true, + Type: "a", + Major: ptr.To(int64(-1)), + Minor: ptr.To(int64(-1)), + Access: "rwm", + }}) + if err != nil { + return fmt.Errorf("failed to create eBPF device filter program: %w", err) + } + + tmpCgroupPath, err := os.MkdirTemp(mountPoint, "troubleshoot-devices-detection-*") + if err != nil { + return fmt.Errorf("failed to create temporary cgroup: %w", err) + } + defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }() + + dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err}) + } + defer func() { + if closeErr := unix.Close(dirFD); closeErr != nil { + err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr}) + } + }() + + close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD) + if err != nil { + // RemoveMemlock may be required on kernels < 5.11 + // observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l + // https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360 + if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") { + if err2 := rlimit.RemoveMemlock(); err2 != nil { + err = errors.Join(err, err2) + } else { + // Try again, MEMLOCK should be removed by now. + close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD) + if err2 != nil { + err = errors.Join(err, err2) + } else { + err = nil + } + } + } + } + if err != nil { + if eBPFProgramUnsupported(err) { + return err + } + return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err) + } + + return close() +} + +// Returns true if the given error indicates that an eBPF program is unsupported +// by the kernel. +func eBPFProgramUnsupported(err error) bool { + // https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49 + + switch { + // EINVAL occurs when attempting to create a program with an unknown type. + case errors.Is(err, unix.EINVAL): + return true + + // E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of + // the struct known by the running kernel, meaning the kernel is too old to + // support the given prog type. + case errors.Is(err, unix.E2BIG): + return true + + default: + return false + } +} + +// Detect the freezer controller. It doesn't appear in the cgroup.controllers +// file. Check for the existence of the cgroup.freeze file in the troubleshoot cgroup +// instead, or try to create a dummy cgroup if troubleshoot runs in the root cgroup. +// +// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files +func detectV2FreezerController(mountPoint string) bool { + + // Detect the freezer controller by checking troubleshoot's cgroup for the existence + // of the cgroup.freeze file. + // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#processes + cgroupPath, err := cgroup2.NestedGroupPath("") + if err != nil { + klog.V(2).Infof(fmt.Errorf("failed to get troubleshoot cgroup: %w", err).Error()) + return false + } + + if cgroupPath != "/" { + cgroupPath = filepath.Join(mountPoint, cgroupPath) + } else { // The root cgroup cannot be frozen. Try to create a dummy cgroup. + tmpCgroupPath, err := os.MkdirTemp(mountPoint, "troubleshoot-freezer-detection-*") + if err != nil { + if errors.Is(err, os.ErrPermission) && os.Geteuid() != 0 { + // Insufficient permissions. Creating a cgroup requires elevated permissions + klog.V(2).Info("insufficient permissions to create temporary cgroup") + } + if errors.Is(err, unix.EROFS) && os.Geteuid() != 0 { + klog.V(2).Info("read-only file system detected when trying to create a temporary cgroup") + } + + klog.V(2).Infof("failed to create temporary cgroup: %v", err) + return false + } + defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }() + cgroupPath = tmpCgroupPath + } + + // Check if the cgroup.freeze exists + if stat, err := os.Stat(filepath.Join(cgroupPath, "cgroup.freeze")); (err == nil && stat.IsDir()) || os.IsNotExist(err) { + klog.V(2).Infof("cgroup.freeze exists at %q", cgroupPath) + return false + } else if err != nil { + klog.V(2).Infof("failed to check for cgroup.freeze at %q: %v", cgroupPath, err) + return false + } + + klog.V(2).Infof("cgroup.freeze exists at %q", cgroupPath) + return true +} diff --git a/pkg/collect/host_cgroup_others.go b/pkg/collect/host_cgroup_others.go new file mode 100644 index 000000000..5c7a37176 --- /dev/null +++ b/pkg/collect/host_cgroup_others.go @@ -0,0 +1,11 @@ +//go:build !linux + +package collect + +import ( + "fmt" +) + +func discoverConfiguration(_ string) (cgroupsResult, error) { + return cgroupsResult{}, fmt.Errorf("Discovery of cgroups not inimplemented for this OS") +} diff --git a/pkg/collect/host_cgroup_test.go b/pkg/collect/host_cgroup_test.go new file mode 100644 index 000000000..cee13c4a1 --- /dev/null +++ b/pkg/collect/host_cgroup_test.go @@ -0,0 +1,60 @@ +package collect + +import ( + "bytes" + "reflect" + "testing" +) + +func Test_parseV1ControllerNames(t *testing.T) { + tests := []struct { + name string + subsystems []byte + want []string + wantErr bool + }{ + { + name: "no controllers", + subsystems: []byte(""), + want: []string{}, + wantErr: false, + }, + { + name: "multiple enabled controllers", + subsystems: []byte( + ` +#subsys_name hierarchy num_cgroups enabled +cpuset 5 1 +cpu 9 41 1 +cpuacct 9 41 1 +blkio 11 41 1 +memory 8 95 0 +devices 13 41 1 +freezer 3 2 1 +net_cls 4 1 1 +perf_event 2 1 0 +net_prio 4 1 0 +hugetlb 12 1 1 +pids 10 46 1 +rdma 6 1 0 +misc 7 1 0 +`), + want: []string{"cpu", "cpuacct", "blkio", "devices", "freezer", "net_cls", "hugetlb", "pids"}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := bytes.NewReader(tt.subsystems) + + got, err := parseV1ControllerNames(r) + if (err != nil) != tt.wantErr { + t.Errorf("parseV1ControllerNames() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseV1ControllerNames() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/collect/host_collector.go b/pkg/collect/host_collector.go index ba7d00dd5..48565e803 100644 --- a/pkg/collect/host_collector.go +++ b/pkg/collect/host_collector.go @@ -63,6 +63,8 @@ func GetHostCollector(collector *troubleshootv1beta2.HostCollect, bundlePath str return &CollectHostCopy{collector.HostCopy, bundlePath}, true case collector.HostKernelConfigs != nil: return &CollectHostKernelConfigs{collector.HostKernelConfigs, bundlePath}, true + case collector.HostCGroups != nil: + return &CollectHostCGroups{collector.HostCGroups, bundlePath}, true default: return nil, false } diff --git a/schemas/supportbundle-troubleshoot-v1beta2.json b/schemas/supportbundle-troubleshoot-v1beta2.json index 170657f84..c60a21683 100644 --- a/schemas/supportbundle-troubleshoot-v1beta2.json +++ b/schemas/supportbundle-troubleshoot-v1beta2.json @@ -18816,6 +18816,20 @@ } } }, + "cgroups": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "mountPoint": { + "type": "string" + } + } + }, "copy": { "type": "object", "required": [