From 5d76e658ce8f8fc68dd3a9aeff25d6762534f3dd Mon Sep 17 00:00:00 2001 From: Sayan Mondal Date: Thu, 13 Jun 2024 12:01:43 +0530 Subject: [PATCH] chore: Adding docs for probe best practices and future roadmap --- website/docs/best-practices.md | 23 +++++ website/docs/concepts/probes.md | 2 + website/sidebars.js | 150 ++++++++++++++++---------------- 3 files changed, 101 insertions(+), 74 deletions(-) create mode 100644 website/docs/best-practices.md diff --git a/website/docs/best-practices.md b/website/docs/best-practices.md new file mode 100644 index 00000000..6dbc8daf --- /dev/null +++ b/website/docs/best-practices.md @@ -0,0 +1,23 @@ +--- +id: best-practices +title: Best Practices +sidebar_label: Best Practices +--- + +--- + +## Probes + +#### Restrict execution scope + +* For the cases where a malicious higher level user (who doesn't have cluster access) tries to extract the SA token from the probe pod and leverages it for destructive action - The recommendation would be to have a reduced scope within the service account itself (i.e., create rbac w/o loopholes). The RBAC for the agent itself can be subject to scrutiny prior to deployment - it can be restricted to a namespace with verbs against restricted resources etc., The agent setup can be carried out via Helm too, with changes to templates undergoing review. + + The intent of the command probe pod is allow the users to perform custom checks which allow them to either (a) validate the impact of chaos, either within or outside the cluster (b) manually trigger remediation or (c) perform tasks that complement the chaos itself (such as load etc.,). Having a restrictive scope for this feature would depend on what kind of tasks are allowed by the cluster/devops admins within the purview of (a), (b), (c) - rather than being imposed from within the product by default. If some of the validations involve performing kube-api calls, we would need to support that / binding the admin's decision. + +* Only users with `Executor` roles can carry out executing probes. Project `Owners`, however, wield the authority to create, edit and also execute probes, shaping them to suit project needs and objectives. This division ensures efficient probe management, with Executors handling execution and Project Owners overseeing customization and design. + +#### Future Roadmap + +* Resilience probes addition and execition are to be supported via ChaosHub and is also expected to have preset templates. The change entails probes being introduced as first class citizens/resources within the platform that can be reused across experiments via Hub. In this context, the ability to create and modify experiments/probes can be provided to a owner persona, with the probes being maintained in a Git repository (with its introduction in ChaosHub), which is also expected to have the right number of approvals for use. These are then executed as is by the executor persona on the platform. + +* Adding a command scanner to restrict malicious linux commands/attacks used against cmdProbes via enhanced checks. Integrate the command scanner into the CREATE and UPDATE APIs of cmdProbes ensures that all commands passed through these interfaces are subjected to thorough scrutiny. This involves modifying the API endpoints to include the command scanning functionality and implementing appropriate validation logic. \ No newline at end of file diff --git a/website/docs/concepts/probes.md b/website/docs/concepts/probes.md index c28b22b5..0e393868 100644 --- a/website/docs/concepts/probes.md +++ b/website/docs/concepts/probes.md @@ -112,6 +112,8 @@ probe: > `source.hostNetwork` can be set to `true` to allow access to the node network namespace for the pod executing the probe +For insights into efficient chaos probe practices and our roadmap for the future, check out the [best practices](../best-practices.md#probes). + ### k8sProbe With the proliferation of custom resources & operators, especially in the case of stateful applications, the steady-state is manifested as status parameters/flags within Kubernetes resources. k8sProbe addresses verification of the desired resource state by allowing users to define the Kubernetes GVR (group-version-resource) with appropriate filters (field selectors/label selectors). The fault makes use of the Kubernetes Dynamic Client to achieve this.The `k8sProbe` can be defined at `.spec.experiments[].spec.probe` the path inside ChaosEngine. diff --git a/website/sidebars.js b/website/sidebars.js index 8ee061a7..651d7fd3 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -1,119 +1,121 @@ module.exports = { - "docs": [ + docs: [ { - "Introduction": ["introduction/what-is-litmus", "introduction/features", "introduction/usage", "introduction/core-principles", "introduction/community", "introduction/other-links"] + Introduction: [ + 'introduction/what-is-litmus', + 'introduction/features', + 'introduction/usage', + 'introduction/core-principles', + 'introduction/community', + 'introduction/other-links' + ] }, { - "Getting Started": [ - "getting-started/resources", - "getting-started/installation", - ] + 'Getting Started': ['getting-started/resources', 'getting-started/installation'] }, { - "Architecture": [ - "architecture/overview", - "architecture/architecture-summary", - "architecture/chaos-control-plane", - "architecture/chaos-execution-plane", - "architecture/chaos-fault-flow" + Architecture: [ + 'architecture/overview', + 'architecture/architecture-summary', + 'architecture/chaos-control-plane', + 'architecture/chaos-execution-plane', + 'architecture/chaos-fault-flow' ] }, { - "Concepts": [ - "concepts/overview", - "concepts/chaos-infrastructure", - "concepts/chaoshub", - "concepts/chaos-workflow", - "concepts/probes", - "concepts/user-management", - "concepts/projects", - "concepts/teaming", - "concepts/gitops", - "concepts/oauth-dex-concept" + Concepts: [ + 'concepts/overview', + 'concepts/chaos-infrastructure', + 'concepts/chaoshub', + 'concepts/chaos-workflow', + 'concepts/probes', + 'concepts/user-management', + 'concepts/projects', + 'concepts/teaming', + 'concepts/gitops', + 'concepts/oauth-dex-concept' ] }, { - "User Guides": [ - "user-guides/overview", + 'User Guides': [ + 'user-guides/overview', { - "Advanced Installation": [ + 'Advanced Installation': [ { - "ChaosCenter": [ - "user-guides/chaoscenter-oauth-dex-installation", - "user-guides/chaoscenter-cluster-scope-installation", - "user-guides/chaoscenter-namespace-scope-installation", - "user-guides/setup-without-ingress", - "user-guides/setup-with-ingress" + ChaosCenter: [ + 'user-guides/chaoscenter-oauth-dex-installation', + 'user-guides/chaoscenter-cluster-scope-installation', + 'user-guides/chaoscenter-namespace-scope-installation', + 'user-guides/setup-without-ingress', + 'user-guides/setup-with-ingress' ] }, - "user-guides/chaos-infrastructure-installation" + 'user-guides/chaos-infrastructure-installation' ] }, { - "Environments": [ - "user-guides/create-environment", - "user-guides/edit-environment", - "user-guides/delete-environment", + Environments: [ + 'user-guides/create-environment', + 'user-guides/edit-environment', + 'user-guides/delete-environment' ] }, { - "Chaos Infrastructure": [ - "user-guides/create-infrastructure", - "user-guides/delete-infrastructure", - ] + 'Chaos Infrastructure': ['user-guides/create-infrastructure', 'user-guides/delete-infrastructure'] }, { - "Injecting Fault": [ - "user-guides/schedule-experiment", - "user-guides/observe-experiment", - "user-guides/edit-schedule", - "user-guides/download-experiment-manifest", - "user-guides/re-run-experiment", - "user-guides/delete-experiment", - "user-guides/construct-experiment" + 'Injecting Fault': [ + 'user-guides/schedule-experiment', + 'user-guides/observe-experiment', + 'user-guides/edit-schedule', + 'user-guides/download-experiment-manifest', + 'user-guides/re-run-experiment', + 'user-guides/delete-experiment', + 'user-guides/construct-experiment' ] }, { - "Resilience Probes": [ - "user-guides/create-resilience-probe", - "user-guides/delete-resilience-probe", - "user-guides/edit-resilience-probe", - "user-guides/view-resilience-probe" + 'Resilience Probes': [ + 'user-guides/create-resilience-probe', + 'user-guides/delete-resilience-probe', + 'user-guides/edit-resilience-probe', + 'user-guides/view-resilience-probe' ] }, - "user-guides/account-settings", + 'user-guides/account-settings', { - "User Management": [ - "user-guides/create-user", - "user-guides/view-user", - "user-guides/reset-password", - "user-guides/deactivate-user" + 'User Management': [ + 'user-guides/create-user', + 'user-guides/view-user', + 'user-guides/reset-password', + 'user-guides/deactivate-user' ] }, { - "Managing Projects": ["user-guides/change-project-name", "user-guides/leave-project"] + 'Managing Projects': ['user-guides/change-project-name', 'user-guides/leave-project'] }, { - "Teaming": [ - "user-guides/invite-team-member", - "user-guides/edit-invite", - "user-guides/accept-invite", - "user-guides/remove-team-member" + Teaming: [ + 'user-guides/invite-team-member', + 'user-guides/edit-invite', + 'user-guides/accept-invite', + 'user-guides/remove-team-member' ] }, - "user-guides/gitops-configuration", - "user-guides/image-registry", - "user-guides/uninstall-litmus" + 'user-guides/gitops-configuration', + 'user-guides/image-registry', + 'user-guides/uninstall-litmus' ] }, { - "Litmusctl": ["litmusctl/installation", "litmusctl/litmusctl-usage"] + Litmusctl: ['litmusctl/installation', 'litmusctl/litmusctl-usage'] }, { - "Integrations": ["integrations/prometheus", "integrations/grafana", "integrations/backstage"] + Integrations: ['integrations/prometheus', 'integrations/grafana', 'integrations/backstage'] }, - "troubleshooting", - "glossary", - "faq" + 'troubleshooting', + 'best-practices', + 'glossary', + 'faq' ] }