Skip to content

Commit

Permalink
Add api endpoint for marking errored operations as resolved
Browse files Browse the repository at this point in the history
  • Loading branch information
Varsius committed Oct 28, 2024
1 parent 45a79d6 commit fcb0a6f
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 1 deletion.
8 changes: 8 additions & 0 deletions docs/api-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ This document uses the terminology defined in the [README.md](../README.md#termi
* [DELETE /v1/projects/:id/resources/:type](#delete-v1projectsidresourcestype)
* [GET /v1/projects/:id/assets/:type](#get-v1projectsidassetstype)
* [GET /v1/projects/:id/assets/:type/:id](#get-v1projectsidassetstypeid)
* [POST /v1/projects/:id/assets/:type/:id/error-resolved](#post-v1projectsidassetstypeiderror-resolved)
* [GET /v1/projects/:id/resources/:type/operations/pending](#get-v1projectsidresourcestypeoperationspending)
* [GET /v1/projects/:id/resources/:type/operations/recently-failed](#get-v1projectsidresourcestypeoperationsrecently-failed)
* [GET /v1/projects/:id/resources/:type/operations/recently-succeeded](#get-v1projectsidresourcestypeoperationsrecently-succeeded)
Expand Down Expand Up @@ -298,6 +299,13 @@ The following fields may be returned for each operation, both below `pending_ope

The previous table contains a lot of rules like "this field is not shown for operations in state X". When this is confusing to you, have a look at the state machine diagram in [README.md](../README.md#terminology). The reason why many fields are optional is that they only have values when the respective state was entered in the operation's lifecycle.

## POST /v1/projects/:id/assets/:type/:id/error-resolved

This endpoint allows manually marking the latest errored operation of a specified asset as resolved. This will remove the "resize errored" alert associated with the asset which otherwise only disappears if a later operation on the same asset succeeds.
Returns `404` if the project or asset is not found.
Returns `409` if the last operation of the asset is not `errored`.
Otherwise returns `200`.

## GET /v1/projects/:id/resources/:type/operations/pending
## GET /v1/projects/:id/resources/:type/operations/recently-failed
## GET /v1/projects/:id/resources/:type/operations/recently-succeeded
Expand Down
3 changes: 3 additions & 0 deletions internal/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ func (h *handler) AddTo(router *mux.Router) {
router.Methods("GET").
Path(`/v1/projects/{project_id}/assets/{asset_type}/{asset_uuid}`).
HandlerFunc(h.GetAsset)
router.Methods("POST").
Path(`/v1/projects/{project_id}/assets/{asset_type}/{asset_uuid}/error-resolved`).
HandlerFunc(h.PostAssetErrorResolved)

router.Methods("GET").
Path(`/v1/projects/{project_id}/resources/{asset_type}/operations/pending`).
Expand Down
65 changes: 64 additions & 1 deletion internal/api/assets.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ func (h handler) GetAsset(w http.ResponseWriter, r *http.Request) {
if wantsFinishedOps {
var dbFinishedOps []db.FinishedOperation
_, err = h.DB.Select(&dbFinishedOps,
`SELECT * FROM finished_operations WHERE asset_id = $1 ORDER BY finished_at`,
`SELECT * FROM finished_operations
WHERE asset_id = $1 AND outcome != 'error-resolved'
ORDER BY finished_at`,
dbAsset.ID)
if respondwith.ErrorText(w, err) {
return
Expand All @@ -211,3 +213,64 @@ func (h handler) GetAsset(w http.ResponseWriter, r *http.Request) {

respondwith.JSON(w, http.StatusOK, asset)
}

func (h handler) PostAssetErrorResolved(w http.ResponseWriter, r *http.Request) {
httpapi.IdentifyEndpoint(r, "/v1/projects/:id/assets/:type/:uuid/error-resolved")

projectUUID, token := h.CheckToken(w, r)
if token == nil {
return
}
if !token.Require(w, "cluster:access") {
return
}
dbResource := h.LoadResource(w, r, projectUUID, token, false)
if dbResource == nil {
return
}

var queryResult struct {
AssetID int64 `db:"id"`
OperationReason castellum.OperationReason `db:"reason"`
OperationOutcome castellum.OperationOutcome `db:"outcome"`
}

err := h.DB.SelectOne(&queryResult,
`SELECT a.id, fo.reason, fo.outcome
FROM assets a
LEFT JOIN finished_operations fo ON a.id = fo.asset_id
WHERE a.resource_id = $1 AND a.uuid = $2
ORDER BY fo.finished_at DESC LIMIT 1`,
dbResource.ID, mux.Vars(r)["asset_uuid"])

if errors.Is(err, sql.ErrNoRows) {
respondWithNotFound(w)
return
}
if respondwith.ErrorText(w, err) {
return
}
if queryResult.OperationOutcome != castellum.OperationOutcomeErrored {
http.Error(w, "last operation of the asset is not in an errored state and cannot be resolved.", http.StatusConflict)
return
}

now := h.TimeNow()
userUUID := token.UserUUID()
err = h.DB.Insert(&db.FinishedOperation{
AssetID: queryResult.AssetID,
Reason: queryResult.OperationReason,
Outcome: castellum.OperationOutcomeErrorResolved,
CreatedAt: now,
ConfirmedAt: &now,
GreenlitAt: &now,
FinishedAt: now,
GreenlitByUserUUID: &userUUID,
})

if respondwith.ErrorText(w, err) {
return
}

w.WriteHeader(http.StatusOK)
}
56 changes: 56 additions & 0 deletions internal/api/assets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/sapcc/go-api-declarations/castellum"
"github.com/sapcc/go-bits/assert"
"github.com/sapcc/go-bits/easypg"
"github.com/sapcc/go-bits/mock"

"github.com/sapcc/castellum/internal/core"
Expand Down Expand Up @@ -228,6 +229,61 @@ func TestGetAsset(baseT *testing.T) {
})
}

func TestPostAssetErrorResolved(baseT *testing.T) {
t := test.T{T: baseT}
clock := mock.NewClock()
clock.StepBy(time.Hour)
withHandler(t, core.Config{}, clock.Now, func(h *handler, hh http.Handler, mv *mock.Validator[*mock.Enforcer], _ []db.Resource, _ []db.Asset) {
tr, tr0 := easypg.NewTracker(t.T, h.DB.Db)
tr0.Ignore()

// endpoint requires cluster access
mv.Enforcer.Forbid("cluster:access")
assert.HTTPRequest{
Method: "POST",
Path: "/v1/projects/project1/assets/foo/fooasset1/error-resolved",
ExpectStatus: http.StatusForbidden,
}.Check(t.T, hh)
mv.Enforcer.Allow("cluster:access")

// expect error for unknown project
assert.HTTPRequest{
Method: "POST",
Path: "/v1/projects/project1/assets/projectdoesnotexist/fooasset1/error-resolved",
ExpectStatus: http.StatusNotFound,
}.Check(t.T, hh)

// expect error for unknown asset
assert.HTTPRequest{
Method: "POST",
Path: "/v1/projects/project1/assets/foo/assetdoesnotexist/error-resolved",
ExpectStatus: http.StatusNotFound,
}.Check(t.T, hh)

tr.DBChanges().AssertEmpty()

// happy path
req := assert.HTTPRequest{
Method: "POST",
Path: "/v1/projects/project1/assets/foo/fooasset1/error-resolved",
ExpectStatus: http.StatusOK,
}
req.Check(t.T, hh)

tr.DBChanges().AssertEqualf(`
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, created_at, confirmed_at, greenlit_at, finished_at, greenlit_by_user_uuid, usage) VALUES (1, 'critical', 'error-resolved', 0, 0, %[1]d, %[1]d, %[1]d, %[1]d, '', 'null');
`,
h.TimeNow().Unix())

// expect conflict for asset where the last operation is not "errored"
assert.HTTPRequest{
Method: "POST",
Path: "/v1/projects/project1/assets/foo/fooasset1/error-resolved",
ExpectStatus: http.StatusConflict,
}.Check(t.T, hh)
})
}

func p2string(val string) *string {
return &val
}
Expand Down
3 changes: 3 additions & 0 deletions internal/api/fixtures/start-data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ INSERT INTO assets (id, resource_id, uuid, size, expected_size, scrape_error_mes
-- insert a bogus asset in an unknown asset type; we should not be able to list this in the API
INSERT INTO assets (id, resource_id, uuid, size, expected_size, scrape_error_message, usage, critical_usages, next_scrape_at, min_size, max_size) VALUES (4, 4, 'bogusasset', 100, NULL, '', '{"singular":50}', '', UNIX(314), NULL, NULL);

-- insert a dummy operation that should not be listed
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message, usage) VALUES (1, 'critical', 'error-resolved', 0, 0, UNIX(21), UNIX(22), UNIX(22), 'user3', UNIX(23), '', '{"singular":0}');
-- insert some operations that we can list
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message, usage) VALUES (1, 'low', 'cancelled', 1000, 900, UNIX(31), NULL, NULL, NULL, UNIX(32), '', '{"singular":200}');
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message, usage) VALUES (1, 'high', 'succeeded', 1023, 1024, UNIX(41), UNIX(42), UNIX(43), 'user2', UNIX(44), '', '{"singular":818.4}');
INSERT INTO finished_operations (asset_id, reason, outcome, old_size, new_size, created_at, confirmed_at, greenlit_at, greenlit_by_user_uuid, finished_at, error_message, usage) VALUES (1, 'critical', 'errored', 1024, 1025, UNIX(51), UNIX(52), UNIX(52), NULL, UNIX(53), 'datacenter is on fire', '{"singular":983.04}');
1 change: 1 addition & 0 deletions internal/api/resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ func TestDeleteResource(baseT *testing.T) {
tr.DBChanges().AssertEqualf(`
DELETE FROM assets WHERE id = 1 AND resource_id = 1 AND uuid = 'fooasset1';
DELETE FROM assets WHERE id = 2 AND resource_id = 1 AND uuid = 'fooasset2';
DELETE FROM finished_operations WHERE asset_id = 1 AND reason = 'critical' AND outcome = 'error-resolved' AND old_size = 0 AND new_size = 0 AND created_at = 21 AND confirmed_at = 22 AND greenlit_at = 22 AND finished_at = 23 AND greenlit_by_user_uuid = 'user3' AND error_message = '' AND errored_attempts = 0 AND usage = '{"singular":0}';
DELETE FROM finished_operations WHERE asset_id = 1 AND reason = 'critical' AND outcome = 'errored' AND old_size = 1024 AND new_size = 1025 AND created_at = 51 AND confirmed_at = 52 AND greenlit_at = 52 AND finished_at = 53 AND greenlit_by_user_uuid = NULL AND error_message = 'datacenter is on fire' AND errored_attempts = 0 AND usage = '{"singular":983.04}';
DELETE FROM finished_operations WHERE asset_id = 1 AND reason = 'high' AND outcome = 'succeeded' AND old_size = 1023 AND new_size = 1024 AND created_at = 41 AND confirmed_at = 42 AND greenlit_at = 43 AND finished_at = 44 AND greenlit_by_user_uuid = 'user2' AND error_message = '' AND errored_attempts = 0 AND usage = '{"singular":818.4}';
DELETE FROM finished_operations WHERE asset_id = 1 AND reason = 'low' AND outcome = 'cancelled' AND old_size = 1000 AND new_size = 900 AND created_at = 31 AND confirmed_at = NULL AND greenlit_at = NULL AND finished_at = 32 AND greenlit_by_user_uuid = NULL AND error_message = '' AND errored_attempts = 0 AND usage = '{"singular":200}';
Expand Down
6 changes: 6 additions & 0 deletions internal/db/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,10 @@ var SQLMigrations = map[string]string{
ALTER TABLE resources
ADD COLUMN min_free_is_critical BOOLEAN DEFAULT FALSE;
`,
"024_add_error-resolved_to_op_outcome.up.sql": `
ALTER TYPE op_outcome ADD VALUE 'error-resolved';
`,
"024_add_error-resolved_to_op_outcome.down.sql": `
ALTER TYPE op_outcome REMOVE VALUE 'error-resolved';
`,
}

0 comments on commit fcb0a6f

Please sign in to comment.