Skip to content

Commit

Permalink
Host preflight (#311)
Browse files Browse the repository at this point in the history
* Add HostPreflight v1beta2

* Work on TCP Load Balancer

* Host disk usage collector and analyzer

* Host memory analyzer

* TCP port status

* TCP load balancer

* Review changes

Co-authored-by: Marc Campbell <marc.e.campbell@gmail.com>
  • Loading branch information
areed and marccampbell authored Feb 8, 2021
1 parent df68f74 commit 10a34c2
Show file tree
Hide file tree
Showing 38 changed files with 3,274 additions and 49 deletions.
98 changes: 72 additions & 26 deletions cmd/preflight/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,16 @@ func runPreflights(v *viper.Viper, arg string) error {
return errors.Wrapf(err, "failed to parse %s", arg)
}

preflightSpec := obj.(*troubleshootv1beta2.Preflight)
var collectResults preflight.CollectResult
preflightSpecName := ""
finishedCh := make(chan bool, 1)
progressCh := make(chan interface{}, 0) // non-zero buffer will result in missed messages

s := spin.New()
finishedCh := make(chan bool, 1)
progressChan := make(chan interface{}, 0) // non-zero buffer will result in missed messages
go func() {
for {
select {
case msg, ok := <-progressChan:
case msg, ok := <-progressCh:
if !ok {
continue
}
Expand All @@ -114,60 +115,105 @@ func runPreflights(v *viper.Viper, arg string) error {
}
}
}()

defer func() {
close(finishedCh)
close(progressCh)
}()

if preflightSpec, ok := obj.(*troubleshootv1beta2.Preflight); ok {
r, err := collectInCluster(preflightSpec, finishedCh, progressCh)
if err != nil {
return errors.Wrap(err, "failed to collect in cluster")
}
collectResults = *r
preflightSpecName = preflightSpec.Name
} else if hostPreflightSpec, ok := obj.(*troubleshootv1beta2.HostPreflight); ok {
r, err := collectHost(hostPreflightSpec, finishedCh, progressCh)
if err != nil {
return errors.Wrap(err, "failed to collect from host")
}
collectResults = *r
preflightSpecName = hostPreflightSpec.Name
}

if collectResults == nil {
return errors.New("no results")
}

analyzeResults := collectResults.Analyze()

if preflightSpec, ok := obj.(*troubleshootv1beta2.Preflight); ok {
if preflightSpec.Spec.UploadResultsTo != "" {
err := uploadResults(preflightSpec.Spec.UploadResultsTo, analyzeResults)
if err != nil {
progressCh <- err
}
}
}

finishedCh <- true

if v.GetBool("interactive") {
if len(analyzeResults) == 0 {
return errors.New("no data has been collected")
}
return showInteractiveResults(preflightSpecName, analyzeResults)
}

return showStdoutResults(v.GetString("format"), preflightSpecName, analyzeResults)
}

func collectInCluster(preflightSpec *troubleshootv1beta2.Preflight, finishedCh chan bool, progressCh chan interface{}) (*preflight.CollectResult, error) {
v := viper.GetViper()

restConfig, err := k8sutil.GetRESTConfig()
if err != nil {
return errors.Wrap(err, "failed to convert kube flags to rest config")
return nil, errors.Wrap(err, "failed to convert kube flags to rest config")
}

collectOpts := preflight.CollectOpts{
Namespace: v.GetString("namespace"),
IgnorePermissionErrors: v.GetBool("collect-without-permissions"),
ProgressChan: progressChan,
ProgressChan: progressCh,
KubernetesRestConfig: restConfig,
}

if v.GetString("since") != "" || v.GetString("since-time") != "" {
err := parseTimeFlags(v, progressChan, preflightSpec.Spec.Collectors)
err := parseTimeFlags(v, progressCh, preflightSpec.Spec.Collectors)
if err != nil {
return err
return nil, err
}
}

collectResults, err := preflight.Collect(collectOpts, preflightSpec)
if err != nil {
if !collectResults.IsRBACAllowed {
if !collectResults.IsRBACAllowed() {
if preflightSpec.Spec.UploadResultsTo != "" {
err := uploadErrors(preflightSpec.Spec.UploadResultsTo, collectResults.Collectors)
clusterCollectResults := collectResults.(preflight.ClusterCollectResult)
err := uploadErrors(preflightSpec.Spec.UploadResultsTo, clusterCollectResults.Collectors)
if err != nil {
progressChan <- err
progressCh <- err
}
}
}
return err
return nil, err
}

analyzeResults := collectResults.Analyze()
if preflightSpec.Spec.UploadResultsTo != "" {
err := uploadResults(preflightSpec.Spec.UploadResultsTo, analyzeResults)
if err != nil {
progressChan <- err
}
}
return &collectResults, nil
}

finishedCh <- true
func collectHost(hostPreflightSpec *troubleshootv1beta2.HostPreflight, finishedCh chan bool, progressCh chan interface{}) (*preflight.CollectResult, error) {
collectOpts := preflight.CollectOpts{
ProgressChan: progressCh,
}

if v.GetBool("interactive") {
if len(analyzeResults) == 0 {
return errors.New("no data has been collected")
}
return showInteractiveResults(preflightSpec.Name, analyzeResults)
collectResults, err := preflight.CollectHost(collectOpts, hostPreflightSpec)
if err != nil {
return nil, errors.Wrap(err, "failed to collect from host")
}

return showStdoutResults(v.GetString("format"), preflightSpec.Name, analyzeResults)
return &collectResults, nil
}

func parseTimeFlags(v *viper.Viper, progressChan chan interface{}, collectors []*troubleshootv1beta2.Collect) error {
Expand Down
22 changes: 22 additions & 0 deletions examples/preflight/host-cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: cpu
spec:
collectors:
- cpu: {}
analyzers:
- cpu:
outcomes:
- fail:
when: "physical < 4"
message: At least 4 physical CPU cores are required
- fail:
when: "logical < 8"
message: At least 8 CPU cores are required
- warn:
when: "count < 16"
message: At least 16 CPU cores preferred
- pass:
message: This server has sufficient CPU cores

33 changes: 33 additions & 0 deletions examples/preflight/host-disk-usage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: diskUsage
spec:
collectors:
- diskUsage:
collectorName: ephemeral
path: /var/lib/kubelet
analyzers:
- diskUsage:
collectorName: ephemeral
outcomes:
- fail:
when: "total < 20Gi"
message: /var/lib/kubelet has less than 20Gi of total space
- fail:
when: "available < 10Gi"
message: /var/lib/kubelet has less than 10Gi of disk space available
- fail:
when: "used/total > 70%"
message: /var/lib/kubelet is more than 70% full
- warn:
when: "total < 40Gi"
message: /var/lib/kubelet has less than 40Gi of total space
- warn:
when: "used/total > 60%"
message: /var/lib/kubelet is more than 60% full
- pass:
when: "available/total >= 90%"
message: /var/lib/kubelet has more than 90% available
- pass:
message: /var/lib/kubelet has sufficient disk space available
19 changes: 19 additions & 0 deletions examples/preflight/host-memory.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: memory
spec:
collectors:
- memory:
collectorName: memory
analyzers:
- memory:
outcomes:
- fail:
when: "< 8Gi"
message: At least 8Gi of memory is required
- warn:
when: "< 32Gi"
message: At least 32Gi of memory is recommended
- pass:
message: The system has as sufficient memory
30 changes: 30 additions & 0 deletions examples/preflight/host-port.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: port
spec:
collectors:
- tcpPortStatus:
collectorName: k8s
port: 7443
analyzers:
- tcpPortStatus:
collectorName: k8s
outcomes:
- fail:
when: "connection-refused"
message: Connection to port 7443 was refused.
- fail:
when: "address-in-use"
message: Another process was already listening on port 7443.
- fail:
when: "connection-timeout"
message: Timed out connecting to port 7443. Check your firewall.
- fail:
when: "error"
message: Unexpected port status
- pass:
when: "connected"
message: Port 7443 is open
- warn:
message: Unexpected port status
31 changes: 31 additions & 0 deletions examples/preflight/host-tcp-load-balancer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: loadbalancer
spec:
collectors:
- tcpLoadBalancer:
collectorName: loadbalancer
port: 7443
address: 10.128.0.29:7444
analyzers:
- tcpLoadBalancer:
collectorName: loadbalancer
outcomes:
- fail:
when: "connection-refused"
message: Connection to port 7443 via load balancer was refused.
- fail:
when: "address-in-use"
message: Another process was already listening on port 7443.
- fail:
when: "connection-timeout"
message: Timed out connecting to port 7443 via load balancer. Check your firewall.
- fail:
when: "error"
message: Unexpected port status
- pass:
when: "connected"
message: Successfully connected to port 7443 via load balancer
- warn:
message: Unexpected port status
51 changes: 51 additions & 0 deletions examples/preflight/sample-host-preflight.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: troubleshoot.sh/v1beta2
kind: HostPreflight
metadata:
name: example
spec:
collectors:
- tcpLoadBalancer:
collectorName: LB1
address: 10.1.1.1
port: 6443
timeout: 5000ms
- diskUsage:
collectorName: ephemeral
path: /var/lib/kubelet
analyzers:
- cpu:
outcomes:
- fail:
when: "< 4"
message: This server has less than 4 CPU cores, and we require 8, but recommend 16
- warn:
when: "< 16"
message: This server has at least 4 CPU cores, but we recommend 16 or more
- pass:
message: This server has sufficient CPU cores
- tcpLoadBalancer:
collectorName: LB1
outcomes:
- fail:
when: "connection-timeout"
message: The TCP Load Balancer is not forwarding traffic to this server.
- fail:
when: "address-in-use"
message: The local port is not available to validate the Load Balancer configuration.
- pass:
when: "connected"
message: The specified TCP Load Balancer appears to be properly forwarding traffic to this server.
- diskUsage:
collectorName: ephemeral
outcomes:
- fail:
when: "total < 20Gi"
message: /var/lib/kubelet has less than 20Gi of total space
- fail:
when: "available < 10Gi"
message: /var/lib/kubelet has less than 10Gi of disk space available
- fail:
when: "used/total > 70%"
message: /var/lib/kubelet is more than 70% full
- pass:
message: /var/lib/kubelet has sufficient disk space available
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
github.com/prometheus/procfs v0.0.5 // indirect
github.com/replicatedhq/termui/v3 v3.1.1-0.20200811145416-f40076d26851
github.com/segmentio/ksuid v1.0.3
github.com/shirou/gopsutil v3.20.12+incompatible
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.4.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,8 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
github.com/segmentio/ksuid v1.0.3 h1:FoResxvleQwYiPAVKe1tMUlEirodZqlqglIuFsdDntY=
github.com/segmentio/ksuid v1.0.3/go.mod h1:/XUiZBD3kVx5SmUOl55voK5yeAbBNNIed+2O73XgrPE=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/shirou/gopsutil v3.20.12+incompatible h1:6VEGkOXP/eP4o2Ilk8cSsX0PhOEfX6leqAnD+urrp9M=
github.com/shirou/gopsutil v3.20.12+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY=
github.com/shurcooL/events v0.0.0-20181021180414-410e4ca65f48/go.mod h1:5u70Mqkb5O5cxEA8nxTsgrgLehJeAw6Oc4Ab1c/P1HM=
github.com/shurcooL/github_flavored_markdown v0.0.0-20181002035957-2122de532470/go.mod h1:2dOwnU2uBioM+SGy2aZoq1f/Sd1l9OkAeAUvjSyvgU0=
Expand Down
Loading

0 comments on commit 10a34c2

Please sign in to comment.