Skip to content

Commit

Permalink
Allow workbench updates (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall authored Dec 4, 2024
1 parent d14078b commit 61565fe
Show file tree
Hide file tree
Showing 9 changed files with 341 additions and 18 deletions.
23 changes: 22 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,40 @@ go 1.22.2

require (
github.com/golang-jwt/jwt/v5 v5.2.1
github.com/lehigh-university-libraries/go-islandora v0.0.0-20241115202445-965bd728379b
github.com/lehigh-university-libraries/go-islandora v0.0.0-20241204195121-b4515e46e282
github.com/lestrrat-go/jwx v1.2.29
github.com/sfomuseum/go-edtf v1.1.1
)

require (
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
github.com/dprotaso/go-yit v0.0.0-20240618133044-5a0af90af097 // indirect
github.com/getkin/kin-openapi v0.128.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/invopop/yaml v0.3.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/lestrrat-go/backoff/v2 v2.0.8 // indirect
github.com/lestrrat-go/blackmagic v1.0.2 // indirect
github.com/lestrrat-go/httpcc v1.0.1 // indirect
github.com/lestrrat-go/iter v1.0.2 // indirect
github.com/lestrrat-go/option v1.0.1 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect
github.com/oapi-codegen/oapi-codegen/v2 v2.4.1 // indirect
github.com/perimeterx/marshmallow v1.1.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/speakeasy-api/openapi-overlay v0.9.0 // indirect
github.com/spf13/cobra v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/vmware-labs/yaml-jsonpath v0.3.2 // indirect
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/mod v0.22.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/tools v0.27.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
130 changes: 130 additions & 0 deletions go.sum

Large diffs are not rendered by default.

13 changes: 10 additions & 3 deletions internal/handlers/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) {
c := numberToExcelColumn(colIndex)
i := c + strconv.Itoa(rowIndex+2)
if col == "" {
if strInSlice(column, requiredFields) {
// require fields on create
if strInSlice(column, requiredFields) && ColumnValue("Node ID", header, row) == "" {
errors[i] = "Missing value"
}
if column == "Parent Collection" {
Expand Down Expand Up @@ -112,7 +113,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) {

switch column {
// make sure these columns are integers
case "Parent Collection", "PPI":
case "Parent Collection", "PPI", "Node ID":
id, err := strconv.Atoi(cell)
if err != nil {
errors[i] = "Must be an integer"
Expand All @@ -124,7 +125,13 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) {
errors[i] = fmt.Sprintf("Could not identify parent collection %d", id)
}
}
// make sure these columns are valid URLs
if column == "Node ID" {
url := fmt.Sprintf("https://preserve.lehigh.edu/node/%d?_format=json", id)
if !checkURL(url) {
errors[i] = fmt.Sprintf("Could not find node ID %d", id)
}
}
// make sure these columns are valid URLs
case "Catalog or ArchivesSpace URL":
parsedURL, err := url.ParseRequestURI(cell)
if err != nil || parsedURL.Scheme == "" && parsedURL.Host == "" {
Expand Down
10 changes: 10 additions & 0 deletions internal/handlers/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,16 @@ func TestCheckMyWork(t *testing.T) {
statusCode: http.StatusOK,
response: `{"D2":"Pages must have a parent id"}`,
},
{
name: "Do not require title/model on updates",
method: http.MethodPost,
body: [][]string{
{"Title", "Object Model", "Full Title", "Node ID"},
{"", "", "", "2"},
},
statusCode: http.StatusOK,
response: `{}`,
},
}

sharedSecret := "foo"
Expand Down
16 changes: 9 additions & 7 deletions internal/handlers/transform.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,28 @@ import (
)

func TransformCsv(w http.ResponseWriter, r *http.Request) {
target := "/tmp/target.csv"
headers, rows, linkedAgents, err := readCSVWithJSONTags(r)
if err != nil {
slog.Error("Failed to read CSV", "err", err)
http.Error(w, "Error parsing CSV", http.StatusBadRequest)
return
}

firstRow := make([]string, 0, len(headers))
for header := range headers {
firstRow = append(firstRow, header)
}
target := "/tmp/target.csv"
if strInSlice("node_id", firstRow) {
target = "/tmp/target.update.csv"
}
file, err := os.Create(target)
if err != nil {
slog.Error("Failed to create file", "err", err)
http.Error(w, "Internal error", http.StatusInternalServerError)
return
}

writer := csv.NewWriter(file)
firstRow := make([]string, 0, len(headers))
for header := range headers {
firstRow = append(firstRow, header)
}

// finally, write the header to the CSV
if err := writer.Write(firstRow); err != nil {
Expand Down Expand Up @@ -224,7 +226,7 @@ func readCSVWithJSONTags(r *http.Request) (map[string]bool, []map[string][]strin
if !re.MatchString(str) {
return nil, nil, nil, fmt.Errorf("unknown %s: %s", jsonTag, str)
}
case "field_weight":
case "field_weight", "node_id":
_, err := strconv.Atoi(str)
if err != nil {
return nil, nil, nil, fmt.Errorf("unknown %s: %s", jsonTag, str)
Expand Down
131 changes: 131 additions & 0 deletions internal/handlers/transform_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package handlers

import (
"bytes"
"net/http"
"net/http/httptest"
"testing"
)

func TestReadCSVWithJSONTags(t *testing.T) {
tests := []struct {
name string
csvContent string
expectedHeaders []string
expectedRows []map[string][]string
expectedAgents [][]string
expectError bool
}{
{
name: "Valid create CSV with headers and rows",
csvContent: `Title,Object Model,Full Title
foo,bar,Full Test Title`,
expectedHeaders: []string{
"title",
"field_model",
"field_full_title",
},
expectedRows: []map[string][]string{
{
"title": {"foo"},
"field_model": {"bar"},
"field_full_title": {"Full Test Title"},
},
},
expectedAgents: nil,
expectError: false,
},
{
name: "Valid update CSV with headers and rows",
csvContent: `Full Title,Node ID
Full Test Title,123`,
expectedHeaders: []string{
"node_id",
"field_full_title",
},
expectedRows: []map[string][]string{
{
"field_full_title": {"Full Test Title"},
"node_id": {"123"},
},
},
expectedAgents: nil,
expectError: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Simulate HTTP request with CSV body
req := httptest.NewRequest(http.MethodPost, "/", bytes.NewBufferString(tt.csvContent))
req.Header.Set("Content-Type", "text/csv")

// Call function under test
headers, rows, _, err := readCSVWithJSONTags(req)
firstRow := make([]string, 0, len(headers))
for header := range headers {
firstRow = append(firstRow, header)
}

if tt.expectError {
if err == nil {
t.Errorf("Expected error, got nil")
}
return
}

if err != nil {
t.Errorf("Unexpected error: %v", err)
return
}

// Assert headers
if !equalStringSlices(firstRow, tt.expectedHeaders) {
t.Errorf("Expected headers %v, got %v", tt.expectedHeaders, firstRow)
}

// Assert rows
if !equalRowSlices(rows, tt.expectedRows) {
t.Errorf("Expected rows %v, got %v", tt.expectedRows, rows)
}
})
}
}

func equalRowSlices(a, b []map[string][]string) bool {
if len(a) != len(b) {
return false
}
for i, rowA := range a {
rowB := b[i]
if !equalRow(rowA, rowB) {
return false
}
}
return true
}

func equalRow(a, b map[string][]string) bool {
if len(a) != len(b) {
return false
}
for k, vA := range a {
vB, exists := b[k]
if !exists || !equalStringSlices(vA, vB) {
return false
}
}
return true
}

func equalStringSlices(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if !strInSlice(a[i], b) {
return false
}
}
return true
}
13 changes: 9 additions & 4 deletions scripts/run-workbench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ if [ -f input_data/target.agents.csv ]; then
grep ERROR logs/agents.log && exit 1 || echo "No errors"
fi

# run the ingest
python3 workbench --config configs/create.yml
if [ -f input_data/target.update.csv ]; then
python3 workbench --config configs/update.yml
grep ERROR logs/update.log && exit 1 || echo "No errors"
fi

if [ -f input_data/target.csv ]; then
python3 workbench --config configs/create.yml
grep ERROR logs/items.log && exit 1 || echo "No errors"
fi

# fail the job if workbench logged any errors
grep ERROR logs/items.log && exit 1 || echo "No errors"
13 changes: 10 additions & 3 deletions scripts/transform.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ rm target.zip

# make sure source and target CSVs line count match
SOURCE=$(wc -l < source.csv)
TARGET=$(wc -l < target.csv)
TARGET_FILE="target.csv"
if [ -f target.update.csv ]; then
TARGET_FILE="target.update.csv"
fi
TARGET=$(wc -l < "$TARGET_FILE")
if [ "$SOURCE" != "$TARGET" ]; then
echo "source and target CSVs don't match ($SOURCE != $TARGET)"
exit 1
Expand All @@ -63,9 +67,12 @@ if [ "$TARGET" -lt 2 ]; then
exit 1
fi

# and some required headers exist
header=$(head -1 target.csv)
# ensure some required headers exist
required_fields=("field_model" "title" "field_full_title" "id")
if [ -f target.update.csv ]; then
required_fields=("node_id")
fi
header=$(head -1 "$TARGET_FILE")
missing_fields=()
for field in "${required_fields[@]}"; do
if ! grep -q "$field" <<< "$header"; then
Expand Down
10 changes: 10 additions & 0 deletions workbench-configs/update.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
task: update
host: https://islandora-prod.lib.lehigh.edu
username: workbench
allow_adding_terms: true
input_csv: target.update.csv
log_file_path: logs/update.log
log_file_mode: w
allow_missing_files: true
additional_files:
- supplemental_file: 151326

0 comments on commit 61565fe

Please sign in to comment.