Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use openapi spec from go-islandora #1

Merged
merged 5 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
with:
version: v1.54
version: v1.59.1

- name: Install dependencies
run: go get .
Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Prepare a CSV to load via Islandora Workbench

This is a convenience utility to allow a more user friendly spreadsheet to then be converted to the format Workbench expects. Can be thought of as middleware between normal spreadsheet curation and the format workbench expects.

## Overview

```mermaid
sequenceDiagram
actor Alice
Expand All @@ -19,7 +21,15 @@ sequenceDiagram
Islandora Workbench->>Drupal: entity CUD
```

TODO
## Getting started

```
go install github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen@latest
go generate ./api
```


## TODO
- [ ] HTTP service to allow a Google Sheets Apps script to validate a spreadsheet
- [ ] Validator service
- [ ] CSV transform service
197 changes: 197 additions & 0 deletions fixtures/sample1.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions fixtures/tmpl.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Upload ID,Page/Item Parent ID,Child Sort Order,Node ID,Parent Collection,Object Model,File Path,Add Coverpage (Y/N),Title,Full Title,Make Public (Y/N),Contributor Name 1,Contributor Relator 1,Contributor Type 1,ORCID Number 1,Contributor Status 1,Contributor Email 1,Contributor Institution 1,Related Department,Resource Type,Genre (Getty AAT),Creation Date,Season,Date Captured,Embargo Until Date,Publisher,Edition,Language,Physical Format (Getty AAT),File Format (MIME Type),Page Count,Dimensions,File Size,Run Time (HH:MM:SS),Digital Origin,Description,Abstract,Preferred-Citation (included only in Fritz Lab and Environmental reports),Capture Device,PPI,Archival Collection,Archival Box,Archival Series,Archival Folder,Local Restriction,Subject Topic (LCSH),Keyword,Subject Name (LCNAF),Subject Geographic (LCNAF),Subject Geographic (Local),Hierarchical Geographic (Getty TGN),Source Publication Title,Source Publication L-ISSN,Volume Number,Issue Number,Page Numbers,DOI,Catalog or ArchivesSpace URL,Call Number,Report Number (included only on ATLSS and Fritz Lab spreadsheet),Rights Statement,Access
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
module github.com/lehigh-university-libraries/fabricator

go 1.22.2

require github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d h1:bryeojGZvWZazzCdVsZB0Pi3LijX/Aq82KgDnjPkt0s=
github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d/go.mod h1:JDCARba/UJW608jcs6XyVuCsfp3LoDVDC++bnGAB47A=
184 changes: 182 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,187 @@
package main

import "log/slog"
import (
"encoding/csv"
"flag"
"fmt"
"log/slog"
"os"
"reflect"
"strings"

"github.com/lehigh-university-libraries/go-islandora/workbench"
)

func getJSONFieldName(tag string) string {
if commaIndex := strings.Index(tag, ","); commaIndex != -1 {
return tag[:commaIndex]
}
return tag
}

func readCSVWithJSONTags(filePath string) ([]map[string][]string, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()

reader := csv.NewReader(file)
headers, err := reader.Read()
if err != nil {
return nil, err
}

var rows []map[string][]string
newCsv := &workbench.SheetsCsv{}
for {
record, err := reader.Read()
if err != nil {
break
}

row := map[string][]string{}
v := reflect.ValueOf(newCsv).Elem()
t := v.Type()

for i, header := range headers {
for j := 0; j < t.NumField(); j++ {
field := t.Field(j)
jsonTag := getJSONFieldName(field.Tag.Get("json"))
if jsonTag == header {
value := v.FieldByName(field.Name)
if value.IsValid() && value.CanSet() {
str := record[i]
column := getJSONFieldName(field.Tag.Get("csv"))
switch header {
case "field_linked_agent.rel_type":
components := strings.Split(str, "|")
str = components[1]
case "field_extent.attr0=page",
"field_extent.attr0=dimensions",
"field_extent.attr0=bytes",
"field_extent.attr0=minutes",
"field_abstract.attr0=description",
"field_abstract.attr0=abstract",
"field_note.attr0=preferred-citation",
"field_note.attr0=capture-device",
"field_note.attr0=ppi",
"field_note.attr0=collection",
"field_note.attr0=box",
"field_note.attr0=series",
"field_note.attr0=folder",
"field_part_detail.attr0=volume",
"field_part_detail.attr0=issue",
"field_part_detail.attr0=page",
"field_identifier.attr0=doi",
"field_identifier.attr0=uri",
"field_identifier.attr0=call-number",
"field_identifier.attr0=report-number":
components := strings.Split(header, ".attr0=")
column = components[0]
str = fmt.Sprintf(`{"value":"%s","attr0":"%s"}`, str, components[1])
case "field_geographic_subject.vid=geographic_naf",
"field_geographic_subject.vid=geographic_local":
components := strings.Split(header, ".vid=")
column = components[0]
str = fmt.Sprintf("%s:%s", components[1], str)
/*
case "field_related_item.title":
case "field_related_item.identifier_type=issn":
case "field_linked_agent.vid",
"field_linked_agent.rel_type":
*/
case "File Path":
str = strings.ReplaceAll(str, `\`, `/`)
str = strings.TrimLeft(str, "/")
if len(str) > 3 && str[0:3] != "mnt" {
str = fmt.Sprintf("/mnt/scans/%s", str)
}
}

str = strings.ReplaceAll(str, " ; ", "|")
if str != "" {
row[column] = append(row[column], str)
}
}
}
}
}

rows = append(rows, row)
}

return rows, nil
}

func main() {
slog.Info("OK")
// Define the source and target flags
source := flag.String("source", "", "Path to the source CSV file")
target := flag.String("target", "", "Path to the target CSV file")
flag.Parse()

if *source == "" || *target == "" {
fmt.Println("Source and target flags are required")
flag.Usage()
return
}
rows, err := readCSVWithJSONTags(*source)
if err != nil {
slog.Error("Failed to read CSV", "err", err)
os.Exit(1)
}

file, err := os.Create(*target)
if err != nil {
slog.Error("Failed to create file", "err", err)
os.Exit(1)
}
defer file.Close()

writer := csv.NewWriter(file)
defer writer.Flush()

// get all possible headers in the CSV
headers := []string{}
for header := range rows[0] {
headers = append(headers, header)
}

// check any columns that have no values
includeColumns := map[string]bool{}
for _, row := range rows {
for _, header := range headers {
if !includeColumns[header] && len(row[header]) > 0 {
includeColumns[header] = true
}
}
}

// remove columns with no values from the header
headers = []string{}
for header, include := range includeColumns {
if include {
headers = append(headers, header)
}
}

// finally, write the header to the CSV
if err := writer.Write(headers); err != nil {
slog.Error("Failed to write record to CSV", "err", err)
os.Exit(1)
}

// write the rows to the CSV
for _, row := range rows {
record := []string{}
for _, header := range headers {
record = append(record, strings.Join(row[header], "|"))
}
if err := writer.Write(record); err != nil {
slog.Error("Failed to write record to CSV", "err", err)
os.Exit(1)
}
}

slog.Info("CSV file has been written successfully")
}
Loading