Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add features to admin interface #2489

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
79 changes: 76 additions & 3 deletions client/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,82 @@ func (n *NodeClient) NetworkListInterfaces(ctx context.Context) (result map[stri
return
}

// AdminRebootNode stops all the running services and reboots the node
func (n *NodeClient) AdminRebootNode(ctx context.Context) error {
const cmd = "zos.admin.reboot"

return n.bus.Call(ctx, n.nodeTwin, cmd, nil, nil)
}

// AdminRestartService restarts a zinit service
func (n *NodeClient) AdminRestartService(ctx context.Context, service string) error {
const cmd = "zos.admin.restart"

return n.bus.Call(ctx, n.nodeTwin, cmd, service, nil)
}

// AdminRestartAll restarts all zinit services
func (n *NodeClient) AdminRestartAll(ctx context.Context) error {
const cmd = "zos.admin.restart_all"

return n.bus.Call(ctx, n.nodeTwin, cmd, nil, nil)
}

// AdminShowLogs returns l lines of zinit logs
func (n *NodeClient) AdminShowLogs(ctx context.Context, l int) (logs []byte, err error) {
const cmd = "zos.admin.show_logs"

err = n.bus.Call(ctx, n.nodeTwin, cmd, l, &logs)
return
}

// AdminShowResolve return the content of /etc/resolv.conf
func (n *NodeClient) AdminShowResolve(ctx context.Context) (res []byte, err error) {
const cmd = "zos.admin.show_resolve"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &res)
return
}

// AdminShowOpenConnections return information about all open connections in the node
func (n *NodeClient) AdminShowOpenConnections(ctx context.Context) (res []byte, err error) {
const cmd = "zos.admin.show_open_connections"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &res)
return
}

// AdminStopWorkload stops a workload
func (n *NodeClient) AdminStopWorkload(ctx context.Context, twinID uint32, wlID uint64) error {
const cmd = "zos.admin.stop_workload"
args := struct {
TwinID uint32 `json:"twin_id"`
WorkloadID uint64 `json:"workload_id"`
}{
TwinID: twinID,
WorkloadID: wlID,
}

return n.bus.Call(ctx, n.nodeTwin, cmd, args, nil)
}

// AdminResumeWorkload stops a workload
func (n *NodeClient) AdminResumeWorkload(ctx context.Context, twinID uint32, wlID uint64) error {
const cmd = "zos.admin.resume_workload"
args := struct {
TwinID uint32 `json:"twin_id"`
WorkloadID uint64 `json:"workload_id"`
}{
TwinID: twinID,
WorkloadID: wlID,
}

return n.bus.Call(ctx, n.nodeTwin, cmd, args, nil)
}

// NetworkListAllInterfaces return all physical devices on a node
func (n *NodeClient) NetworkListAllInterfaces(ctx context.Context) (result map[string]Interface, err error) {
const cmd = "zos.network.admin.interfaces"
const cmd = "zos.admin.interfaces"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &result)

Expand All @@ -271,14 +344,14 @@ func (n *NodeClient) NetworkListAllInterfaces(ctx context.Context) (result map[s
// NetworkSetPublicExitDevice select which physical interface to use as an exit device
// setting `iface` to `zos` will then make node run in a single nic setup.
func (n *NodeClient) NetworkSetPublicExitDevice(ctx context.Context, iface string) error {
const cmd = "zos.network.admin.set_public_nic"
const cmd = "zos.admin.set_public_nic"

return n.bus.Call(ctx, n.nodeTwin, cmd, iface, nil)
}

// NetworkGetPublicExitDevice gets the current dual nic setup of the node.
func (n *NodeClient) NetworkGetPublicExitDevice(ctx context.Context) (exit ExitDevice, err error) {
const cmd = "zos.network.admin.get_public_nic"
const cmd = "zos.admin.get_public_nic"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &exit)
return
Expand Down
10 changes: 6 additions & 4 deletions cmds/modules/api_gateway/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ func action(cli *cli.Context) error {
return fmt.Errorf("failed to create substrate manager: %w", err)
}

router := peer.NewRouter()
gw, err := substrategw.NewSubstrateGateway(manager, id)
if err != nil {
return fmt.Errorf("failed to create api gateway: %w", err)
Expand Down Expand Up @@ -96,6 +95,8 @@ func action(cli *cli.Context) error {
if err != nil {
return fmt.Errorf("failed to create zos api: %w", err)
}

router := peer.NewRouter()
api.SetupRoutes(router)

pair, err := id.KeyPair()
Expand All @@ -105,7 +106,7 @@ func action(cli *cli.Context) error {

bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = 0
backoff.Retry(func() error {
if err = backoff.Retry(func() error {
_, err = peer.NewPeer(
ctx,
hex.EncodeToString(pair.Seed()),
Expand All @@ -117,9 +118,10 @@ func action(cli *cli.Context) error {
if err != nil {
return fmt.Errorf("failed to start a new rmb peer: %w", err)
}

return nil
}, bo)
}, bo); err != nil {
return err
}

log.Info().
Str("broker", msgBrokerCon).
Expand Down
4 changes: 2 additions & 2 deletions cmds/modules/provisiond/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *ContractEventHandler) sync(ctx context.Context) error {
action = r.engine.Pause
}

if err := action(ctx, dl.TwinID, dl.ContractID); err != nil {
if err := action(dl.TwinID, dl.ContractID); err != nil {
log.Error().Err(err).Msg("failed to change contract state")
}
}
Expand Down Expand Up @@ -176,7 +176,7 @@ func (r *ContractEventHandler) Run(ctx context.Context) error {
action = r.engine.Pause
}

if err := action(ctx, event.TwinId, event.Contract); err != nil {
if err := action(event.TwinId, event.Contract); err != nil {
log.Error().Err(err).
Uint32("twin", event.TwinId).
Uint64("contract", event.Contract).
Expand Down
94 changes: 88 additions & 6 deletions docs/manual/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ so `used = user_used + system`, while `system` is only the amount of resourced r
| `zos.storage.pools` | - |`[]Pool`|

List all node pools with their types, size and used space
where

Where

```json
Pool {
Expand Down Expand Up @@ -151,13 +152,95 @@ it means it can act like an access node to user private networks

## Admin

The next set of commands are ONLY possible to be called by the `farmer` only.
The next set of commands are ONLY possible to be called by the `farmer` owning the node.

### Reboot Node

| command |body| return|
|---|---|---|
| `zos.admin.reboot` | - | - |

Stops all services then reboots the node

### Restart Service

| command |body| return|
|---|---|---|
| `zos.admin.restart` | string | - |

Restarts a service running on the node

### Restart All Services

| command |body| return|
|---|---|---|
| `zos.admin.restart_all` | - | - |

Restarts all zinit services running on the node

### Show Logs

| command |body| return|
|---|---|---|
| `zos.admin.show_logs` | int | []byte |

Shows a number of lines of zinit logs

### Show Resolve

| command |body| return|
|---|---|---|
| `zos.admin.show_resolve` | - | []byte |

Shows the content of /etc/resolv.conf

### Show Open Connections

| command |body| return|
|---|---|---|
| `zos.admin.show_open_connections` | - | []byte |

Shows information about all open connections in the node

### Stop Workload

| command |body| return|
|---|---|---|
| `zos.admin.Stop` | `Args` | - |

Where

```json
Args {
"twin_id": "uint32",
"workload_id": "uint64",
}
```

Stops a workload

### Resume Workload

| command |body| return|
|---|---|---|
| `zos.admin.resume` | `Args` | - |

Where

```json
Args {
"twin_id": "uint32",
"workload_id": "uint64",
}
```

Resumes a stopped workload

### List Physical Interfaces

| command |body| return|
|---|---|---|
| `zos.network.admin.interfaces` | - |`map[string]Interface` |
| `zos.admin.interfaces` | - |`map[string]Interface` |

Where

Expand All @@ -175,7 +258,7 @@ Those interfaces then can be used as an input to `set_public_nic`

| command |body| return|
|---|---|---|
| `zos.network.admin.get_public_nic` | - |`ExitDevice` |
| `zos.admin.get_public_nic` | - |`ExitDevice` |

Where

Expand All @@ -193,7 +276,7 @@ returns the interface used by public traffic (for user workloads)

| command |body| return|
|---|---|---|
| `zos.network.admin.set_public_nic` | `name` |- |
| `zos.admin.set_public_nic` | `name` |- |

name must be one of (free) names returned by `zos.network.admin.interfaces`

Expand Down Expand Up @@ -223,7 +306,6 @@ name must be one of (free) names returned by `zos.network.admin.interfaces`
|---|---|---|
| `zos.system.node_features_get` | - |`[]NodeFeature` |


Where

```json
Expand Down
40 changes: 28 additions & 12 deletions pkg/primitives/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"context"
"encoding/json"
"fmt"
"os/exec"
"strconv"
"strings"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -31,9 +34,7 @@ func GetCapacity(ctx context.Context) gridtypes.Capacity {
return val.(gridtypes.Capacity)
}

var (
_ provision.Provisioner = (*Statistics)(nil)
)
var _ provision.Provisioner = (*Statistics)(nil)

type Reserved func() (gridtypes.Capacity, error)

Expand Down Expand Up @@ -146,7 +147,6 @@ func (s *Statistics) hasEnoughCapacity(wl *gridtypes.WorkloadWithID) (gridtypes.
id, _ := gridtypes.NewWorkloadID(dl_.TwinID, dl_.ContractID, wl_.Name)
return id == wl.ID
})

if err != nil {
return used, errors.Wrap(err, "failed to get available memory")
}
Expand All @@ -155,7 +155,7 @@ func (s *Statistics) hasEnoughCapacity(wl *gridtypes.WorkloadWithID) (gridtypes.
return used, fmt.Errorf("cannot fulfil required memory size %d bytes out of usable %d bytes", required.MRU, usable)
}

//check other resources as well?
// check other resources as well?
return used, nil
}

Expand Down Expand Up @@ -235,6 +235,19 @@ func (s *statsStream) Total() gridtypes.Capacity {
return s.stats.Total()
}

func (s *statsStream) OpenConnections() ([]byte, error) {
return exec.Command("ss", "-ptn", "state", "established").Output()
}

func (s *statsStream) openConnectionsCount() (int, error) {
cmd := exec.Command("/bin/sh", "-c", "ss -ptn state established | wc -l")
out, err := cmd.Output()
if err != nil {
return 0, err
}
return strconv.Atoi(strings.TrimSpace(string(out)))
}

func (s *statsStream) Workloads() (int, error) {
capacity, err := s.stats.storage.Capacity()
if err != nil {
Expand All @@ -253,10 +266,17 @@ func (s *statsStream) GetCounters() (pkg.Counters, error) {
if err != nil {
return pkg.Counters{}, err
}

connCount, err := s.openConnectionsCount()
if err != nil {
return pkg.Counters{}, err
}

return pkg.Counters{
Total: s.stats.Total(),
Used: activeCounters.cap,
System: reserved,
Total: s.stats.Total(),
Used: activeCounters.cap,
System: reserved,
OpenConnecions: connCount,
Users: pkg.UsersCounters{
Deployments: activeCounters.deployments,
Workloads: activeCounters.workloads,
Expand Down Expand Up @@ -298,10 +318,6 @@ func (s *statsStream) ListGPUs() ([]pkg.GPUInfo, error) {
return nil, errors.Wrap(err, "failed to list available devices")
}

if err != nil {
return nil, errors.Wrap(err, "failed to list active deployments")
}

used, err := usedGpus()
if err != nil {
return nil, errors.Wrap(err, "failed to list used gpus")
Expand Down
Loading
Loading