Use openapi spec from go-islandora (#1)

lehigh-university-libraries · Jul 9, 2024 · 5df4fdd · 5df4fdd
1 parent 2c55557
commit 5df4fdd
Show file tree

Hide file tree

Showing 7 changed files with 396 additions and 4 deletions.
diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml
@@ -14,7 +14,7 @@ jobs:
       - name: golangci-lint
         uses: golangci/golangci-lint-action@v3
         with:
-          version: v1.54
+          version: v1.59.1
 
       - name: Install dependencies
         run: go get .

diff --git a/README.md b/README.md
@@ -4,6 +4,8 @@ Prepare a CSV to load via Islandora Workbench
 
 This is a convenience utility to allow a more user friendly spreadsheet to then be converted to the format Workbench expects. Can be thought of as middleware between normal spreadsheet curation and the format workbench expects.
 
+## Overview
+
 ```mermaid
 sequenceDiagram
     actor Alice
@@ -19,7 +21,15 @@ sequenceDiagram
     Islandora Workbench->>Drupal: entity CUD
 ```
 
-TODO
+## Getting started
+
+```
+go install github.com/oapi-codegen/oapi-codegen/v2/cmd/oapi-codegen@latest
+go generate ./api
+```
+
+
+## TODO
 - [ ] HTTP service to allow a Google Sheets Apps script to validate a spreadsheet
 - [ ] Validator service
 - [ ] CSV transform service
diff --git a/fixtures/sample1.csv b/fixtures/sample1.csv
diff --git a/fixtures/tmpl.csv b/fixtures/tmpl.csv
@@ -0,0 +1 @@
+Upload ID,Page/Item Parent ID,Child Sort Order,Node ID,Parent Collection,Object Model,File Path,Add Coverpage (Y/N),Title,Full Title,Make Public (Y/N),Contributor Name 1,Contributor Relator 1,Contributor Type 1,ORCID Number 1,Contributor Status 1,Contributor Email 1,Contributor Institution 1,Related Department,Resource Type,Genre (Getty AAT),Creation Date,Season,Date Captured,Embargo Until Date,Publisher,Edition,Language,Physical Format (Getty AAT),File Format (MIME Type),Page Count,Dimensions,File Size,Run Time (HH:MM:SS),Digital Origin,Description,Abstract,Preferred-Citation (included only in Fritz Lab and Environmental reports),Capture Device,PPI,Archival Collection,Archival Box,Archival Series,Archival Folder,Local Restriction,Subject Topic (LCSH),Keyword,Subject Name (LCNAF),Subject Geographic (LCNAF),Subject Geographic (Local),Hierarchical Geographic (Getty TGN),Source Publication Title,Source Publication L-ISSN,Volume Number,Issue Number,Page Numbers,DOI,Catalog or ArchivesSpace URL,Call Number,Report Number (included only on ATLSS and Fritz Lab spreadsheet),Rights Statement,Access
diff --git a/go.mod b/go.mod
@@ -1,3 +1,5 @@
 module github.com/lehigh-university-libraries/fabricator
 
 go 1.22.2
+
+require github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d
diff --git a/go.sum b/go.sum
@@ -0,0 +1,2 @@
+github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d h1:bryeojGZvWZazzCdVsZB0Pi3LijX/Aq82KgDnjPkt0s=
+github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d/go.mod h1:JDCARba/UJW608jcs6XyVuCsfp3LoDVDC++bnGAB47A=
diff --git a/main.go b/main.go
@@ -1,7 +1,187 @@
 package main
 
-import "log/slog"
+import (
+	"encoding/csv"
+	"flag"
+	"fmt"
+	"log/slog"
+	"os"
+	"reflect"
+	"strings"
+
+	"github.com/lehigh-university-libraries/go-islandora/workbench"
+)
+
+func getJSONFieldName(tag string) string {
+	if commaIndex := strings.Index(tag, ","); commaIndex != -1 {
+		return tag[:commaIndex]
+	}
+	return tag
+}
+
+func readCSVWithJSONTags(filePath string) ([]map[string][]string, error) {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	reader := csv.NewReader(file)
+	headers, err := reader.Read()
+	if err != nil {
+		return nil, err
+	}
+
+	var rows []map[string][]string
+	newCsv := &workbench.SheetsCsv{}
+	for {
+		record, err := reader.Read()
+		if err != nil {
+			break
+		}
+
+		row := map[string][]string{}
+		v := reflect.ValueOf(newCsv).Elem()
+		t := v.Type()
+
+		for i, header := range headers {
+			for j := 0; j < t.NumField(); j++ {
+				field := t.Field(j)
+				jsonTag := getJSONFieldName(field.Tag.Get("json"))
+				if jsonTag == header {
+					value := v.FieldByName(field.Name)
+					if value.IsValid() && value.CanSet() {
+						str := record[i]
+						column := getJSONFieldName(field.Tag.Get("csv"))
+						switch header {
+						case "field_linked_agent.rel_type":
+							components := strings.Split(str, "|")
+							str = components[1]
+						case "field_extent.attr0=page",
+							"field_extent.attr0=dimensions",
+							"field_extent.attr0=bytes",
+							"field_extent.attr0=minutes",
+							"field_abstract.attr0=description",
+							"field_abstract.attr0=abstract",
+							"field_note.attr0=preferred-citation",
+							"field_note.attr0=capture-device",
+							"field_note.attr0=ppi",
+							"field_note.attr0=collection",
+							"field_note.attr0=box",
+							"field_note.attr0=series",
+							"field_note.attr0=folder",
+							"field_part_detail.attr0=volume",
+							"field_part_detail.attr0=issue",
+							"field_part_detail.attr0=page",
+							"field_identifier.attr0=doi",
+							"field_identifier.attr0=uri",
+							"field_identifier.attr0=call-number",
+							"field_identifier.attr0=report-number":
+							components := strings.Split(header, ".attr0=")
+							column = components[0]
+							str = fmt.Sprintf(`{"value":"%s","attr0":"%s"}`, str, components[1])
+						case "field_geographic_subject.vid=geographic_naf",
+							"field_geographic_subject.vid=geographic_local":
+							components := strings.Split(header, ".vid=")
+							column = components[0]
+							str = fmt.Sprintf("%s:%s", components[1], str)
+							/*
+								case "field_related_item.title":
+								case "field_related_item.identifier_type=issn":
+								case "field_linked_agent.vid",
+									"field_linked_agent.rel_type":
+							*/
+						case "File Path":
+							str = strings.ReplaceAll(str, `\`, `/`)
+							str = strings.TrimLeft(str, "/")
+							if len(str) > 3 && str[0:3] != "mnt" {
+								str = fmt.Sprintf("/mnt/scans/%s", str)
+							}
+						}
+
+						str = strings.ReplaceAll(str, " ; ", "|")
+						if str != "" {
+							row[column] = append(row[column], str)
+						}
+					}
+				}
+			}
+		}
+
+		rows = append(rows, row)
+	}
+
+	return rows, nil
+}
 
 func main() {
-	slog.Info("OK")
+	// Define the source and target flags
+	source := flag.String("source", "", "Path to the source CSV file")
+	target := flag.String("target", "", "Path to the target CSV file")
+	flag.Parse()
+
+	if *source == "" || *target == "" {
+		fmt.Println("Source and target flags are required")
+		flag.Usage()
+		return
+	}
+	rows, err := readCSVWithJSONTags(*source)
+	if err != nil {
+		slog.Error("Failed to read CSV", "err", err)
+		os.Exit(1)
+	}
+
+	file, err := os.Create(*target)
+	if err != nil {
+		slog.Error("Failed to create file", "err", err)
+		os.Exit(1)
+	}
+	defer file.Close()
+
+	writer := csv.NewWriter(file)
+	defer writer.Flush()
+
+	// get all possible headers in the CSV
+	headers := []string{}
+	for header := range rows[0] {
+		headers = append(headers, header)
+	}
+
+	// check any columns that have no values
+	includeColumns := map[string]bool{}
+	for _, row := range rows {
+		for _, header := range headers {
+			if !includeColumns[header] && len(row[header]) > 0 {
+				includeColumns[header] = true
+			}
+		}
+	}
+
+	// remove columns with no values from the header
+	headers = []string{}
+	for header, include := range includeColumns {
+		if include {
+			headers = append(headers, header)
+		}
+	}
+
+	// finally, write the header to the CSV
+	if err := writer.Write(headers); err != nil {
+		slog.Error("Failed to write record to CSV", "err", err)
+		os.Exit(1)
+	}
+
+	// write the rows to the CSV
+	for _, row := range rows {
+		record := []string{}
+		for _, header := range headers {
+			record = append(record, strings.Join(row[header], "|"))
+		}
+		if err := writer.Write(record); err != nil {
+			slog.Error("Failed to write record to CSV", "err", err)
+			os.Exit(1)
+		}
+	}
+
+	slog.Info("CSV file has been written successfully")
 }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Upload ID,Page/Item Parent ID,Child Sort Order,Node ID,Parent Collection,Object Model,File Path,Add Coverpage (Y/N),Title,Full Title,Make Public (Y/N),Contributor Name 1,Contributor Relator 1,Contributor Type 1,ORCID Number 1,Contributor Status 1,Contributor Email 1,Contributor Institution 1,Related Department,Resource Type,Genre (Getty AAT),Creation Date,Season,Date Captured,Embargo Until Date,Publisher,Edition,Language,Physical Format (Getty AAT),File Format (MIME Type),Page Count,Dimensions,File Size,Run Time (HH:MM:SS),Digital Origin,Description,Abstract,Preferred-Citation (included only in Fritz Lab and Environmental reports),Capture Device,PPI,Archival Collection,Archival Box,Archival Series,Archival Folder,Local Restriction,Subject Topic (LCSH),Keyword,Subject Name (LCNAF),Subject Geographic (LCNAF),Subject Geographic (Local),Hierarchical Geographic (Getty TGN),Source Publication Title,Source Publication L-ISSN,Volume Number,Issue Number,Page Numbers,DOI,Catalog or ArchivesSpace URL,Call Number,Report Number (included only on ATLSS and Fritz Lab spreadsheet),Rights Statement,Access
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d h1:bryeojGZvWZazzCdVsZB0Pi3LijX/Aq82KgDnjPkt0s=
		github.com/lehigh-university-libraries/go-islandora v0.0.0-20240709193244-50f8e60d633d/go.mod h1:JDCARba/UJW608jcs6XyVuCsfp3LoDVDC++bnGAB47A=