Skip to content

Commit

Permalink
Use openapi spec from go-islandora (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall authored Jul 9, 2024
1 parent 2c55557 commit 5df4fdd
Show file tree
Hide file tree
Showing 7 changed files with 396 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
with:
version: v1.54
version: v1.59.1

- name: Install dependencies
run: go get .
Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Prepare a CSV to load via Islandora Workbench

This is a convenience utility to allow a more user friendly spreadsheet to then be converted to the format Workbench expects. Can be thought of as middleware between normal spreadsheet curation and the format workbench expects.

## Overview

```mermaid
sequenceDiagram
actor Alice
Expand All @@ -19,7 +21,15 @@ sequenceDiagram
Islandora Workbench->>Drupal: entity CUD
```

TODO
## Getting started

```
go install github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen@latest
go generate ./api
```


## TODO
- [ ] HTTP service to allow a Google Sheets Apps script to validate a spreadsheet
- [ ] Validator service
- [ ] CSV transform service
197 changes: 197 additions & 0 deletions fixtures/sample1.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions fixtures/tmpl.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Upload ID,Page/Item Parent ID,Child Sort Order,Node ID,Parent Collection,Object Model,File Path,Add Coverpage (Y/N),Title,Full Title,Make Public (Y/N),Contributor Name 1,Contributor Relator 1,Contributor Type 1,ORCID Number 1,Contributor Status 1,Contributor Email 1,Contributor Institution 1,Related Department,Resource Type,Genre (Getty AAT),Creation Date,Season,Date Captured,Embargo Until Date,Publisher,Edition,Language,Physical Format (Getty AAT),File Format (MIME Type),Page Count,Dimensions,File Size,Run Time (HH:MM:SS),Digital Origin,Description,Abstract,Preferred-Citation (included only in Fritz Lab and Environmental reports),Capture Device,PPI,Archival Collection,Archival Box,Archival Series,Archival Folder,Local Restriction,Subject Topic (LCSH),Keyword,Subject Name (LCNAF),Subject Geographic (LCNAF),Subject Geographic (Local),Hierarchical Geographic (Getty TGN),Source Publication Title,Source Publication L-ISSN,Volume Number,Issue Number,Page Numbers,DOI,Catalog or ArchivesSpace URL,Call Number,Report Number (included only on ATLSS and Fritz Lab spreadsheet),Rights Statement,Access
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
module github.com/lehigh-university-libraries/fabricator

go 1.22.2

require github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d h1:bryeojGZvWZazzCdVsZB0Pi3LijX/Aq82KgDnjPkt0s=
github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d/go.mod h1:JDCARba/UJW608jcs6XyVuCsfp3LoDVDC++bnGAB47A=
184 changes: 182 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,187 @@
package main

import "log/slog"
import (
"encoding/csv"
"flag"
"fmt"
"log/slog"
"os"
"reflect"
"strings"

"github.com/lehigh-university-libraries/go-islandora/workbench"
)

func getJSONFieldName(tag string) string {
if commaIndex := strings.Index(tag, ","); commaIndex != -1 {
return tag[:commaIndex]
}
return tag
}

func readCSVWithJSONTags(filePath string) ([]map[string][]string, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()

reader := csv.NewReader(file)
headers, err := reader.Read()
if err != nil {
return nil, err
}

var rows []map[string][]string
newCsv := &workbench.SheetsCsv{}
for {
record, err := reader.Read()
if err != nil {
break
}

row := map[string][]string{}
v := reflect.ValueOf(newCsv).Elem()
t := v.Type()

for i, header := range headers {
for j := 0; j < t.NumField(); j++ {
field := t.Field(j)
jsonTag := getJSONFieldName(field.Tag.Get("json"))
if jsonTag == header {
value := v.FieldByName(field.Name)
if value.IsValid() && value.CanSet() {
str := record[i]
column := getJSONFieldName(field.Tag.Get("csv"))
switch header {
case "field_linked_agent.rel_type":
components := strings.Split(str, "|")
str = components[1]
case "field_extent.attr0=page",
"field_extent.attr0=dimensions",
"field_extent.attr0=bytes",
"field_extent.attr0=minutes",
"field_abstract.attr0=description",
"field_abstract.attr0=abstract",
"field_note.attr0=preferred-citation",
"field_note.attr0=capture-device",
"field_note.attr0=ppi",
"field_note.attr0=collection",
"field_note.attr0=box",
"field_note.attr0=series",
"field_note.attr0=folder",
"field_part_detail.attr0=volume",
"field_part_detail.attr0=issue",
"field_part_detail.attr0=page",
"field_identifier.attr0=doi",
"field_identifier.attr0=uri",
"field_identifier.attr0=call-number",
"field_identifier.attr0=report-number":
components := strings.Split(header, ".attr0=")
column = components[0]
str = fmt.Sprintf(`{"value":"%s","attr0":"%s"}`, str, components[1])
case "field_geographic_subject.vid=geographic_naf",
"field_geographic_subject.vid=geographic_local":
components := strings.Split(header, ".vid=")
column = components[0]
str = fmt.Sprintf("%s:%s", components[1], str)
/*
case "field_related_item.title":
case "field_related_item.identifier_type=issn":
case "field_linked_agent.vid",
"field_linked_agent.rel_type":
*/
case "File Path":
str = strings.ReplaceAll(str, `\`, `/`)
str = strings.TrimLeft(str, "/")
if len(str) > 3 && str[0:3] != "mnt" {
str = fmt.Sprintf("/mnt/scans/%s", str)
}
}

str = strings.ReplaceAll(str, " ; ", "|")
if str != "" {
row[column] = append(row[column], str)
}
}
}
}
}

rows = append(rows, row)
}

return rows, nil
}

func main() {
slog.Info("OK")
// Define the source and target flags
source := flag.String("source", "", "Path to the source CSV file")
target := flag.String("target", "", "Path to the target CSV file")
flag.Parse()

if *source == "" || *target == "" {
fmt.Println("Source and target flags are required")
flag.Usage()
return
}
rows, err := readCSVWithJSONTags(*source)
if err != nil {
slog.Error("Failed to read CSV", "err", err)
os.Exit(1)
}

file, err := os.Create(*target)
if err != nil {
slog.Error("Failed to create file", "err", err)
os.Exit(1)
}
defer file.Close()

writer := csv.NewWriter(file)
defer writer.Flush()

// get all possible headers in the CSV
headers := []string{}
for header := range rows[0] {
headers = append(headers, header)
}

// check any columns that have no values
includeColumns := map[string]bool{}
for _, row := range rows {
for _, header := range headers {
if !includeColumns[header] && len(row[header]) > 0 {
includeColumns[header] = true
}
}
}

// remove columns with no values from the header
headers = []string{}
for header, include := range includeColumns {
if include {
headers = append(headers, header)
}
}

// finally, write the header to the CSV
if err := writer.Write(headers); err != nil {
slog.Error("Failed to write record to CSV", "err", err)
os.Exit(1)
}

// write the rows to the CSV
for _, row := range rows {
record := []string{}
for _, header := range headers {
record = append(record, strings.Join(row[header], "|"))
}
if err := writer.Write(record); err != nil {
slog.Error("Failed to write record to CSV", "err", err)
os.Exit(1)
}
}

slog.Info("CSV file has been written successfully")
}

0 comments on commit 5df4fdd

Please sign in to comment.