Skip to content

Commit

Permalink
fixup hierarchical geo checking and marshalling
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall committed Aug 27, 2024
1 parent b80751e commit 3f99f74
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 21 deletions.
26 changes: 6 additions & 20 deletions internal/handlers/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strings"

"github.com/golang-jwt/jwt/v5"
"github.com/lehigh-university-libraries/fabricator/internal/tgn"
"github.com/lestrrat-go/jwx/jwk"
edtf "github.com/sfomuseum/go-edtf/parser"
)
Expand Down Expand Up @@ -111,7 +112,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) {
doiPattern := regexp.MustCompile(`^10\.\d{4,9}\/[-._;()/:A-Za-z0-9]+$`)
gettyTgnPattern := regexp.MustCompile(`^http://vocab\.getty\.edu/page/tgn/\d+$`)
datePattern := regexp.MustCompile(`^\d{4}(-\d{2}(-\d{2})?)?$`)

hierarchyChecked := map[string]bool{}
errors := map[string]string{}
requiredFields := []string{
"Title",
Expand Down Expand Up @@ -189,30 +190,15 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) {
errors[i] = "Invalid value. Must be Yes or No"
}
case "Hierarchical Geographic (Getty TGN)":
if !gettyTgnPattern.MatchString(cell) {
errors[i] = "Invalid Getty TGN URI"
}
hierarchyURL := strings.Replace(cell, "page", "hierarchy", 1)

req, err := http.NewRequest("GET", hierarchyURL, nil)
if err != nil {
if hierarchyChecked[cell] {
break
}
req.Header.Set("Accept", "application/json")

client := &http.Client{}
resp, err := client.Do(req)
hierarchyChecked[cell] = true
location, err := tgn.GetLocationFromTGN(tc.URI)
if err != nil {
slog.Error("Unable to request hierarchy URL", "url", hierarchyURL, "err", err)
errors[i] = "Unable to request hierarchical information"
break
errors[i] = "Unable to get TGN"
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
slog.Error("Unable to get hierarchy URL", "url", hierarchyURL, "err", err)
errors[i] = "Unable to get hierarchical information"
}
}
}
}
Expand Down
128 changes: 128 additions & 0 deletions internal/tgn/tgn.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package tgn

import (
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
)

// Location represents the hierarchical location details stored in Islandora
type Location struct {
Country string `json:"country"`
State string `json:"state"`
County string `json:"county"`
City string `json:"city"`
}

// Place represents the TGN data
type Place struct {
ID string `json:"id"`
Label string `json:"_label"`
PartOf []struct {
ID string `json:"id"`
Label string `json:"_label"`
} `json:"part_of"`
}

// GetLocationFromTGN fetches the location information from a TGN URI.
func GetLocationFromTGN(uri string) (*Location, error) {
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
slog.Error("Unable to create hierarchy request", "url", uri, "err", err)
return nil, err
}

req.Header.Set("Accept", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("error fetching data: %v", err)
}
defer resp.Body.Close()

// Read the response body
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading response body: %v", err)
}

// Parse the JSON data
var place Place
if err := json.Unmarshal(body, &place); err != nil {
return nil, fmt.Errorf("error parsing JSON: %v", err)
}

location := &Location{}

// Recursively process the hierarchy
err = resolveHierarchy(place, location, 0)
if err != nil {
return nil, err
}

return location, nil
}

// resolveHierarchy recursively resolves the hierarchy from the place data.
func resolveHierarchy(place Place, location *Location, depth int) error {
// If this place has no parent, it must be the city
if len(place.PartOf) == 0 {
location.Country = place.Label
return nil
}

// Recursively resolve the parent hierarchy
parentPlace, err := fetchPlaceData(place.PartOf[0].ID + ".json")
if err != nil {
return err
}
err = resolveHierarchy(parentPlace, location, depth+1)
if err != nil {
return err
}

// Assign the correct label based on depth
switch depth {
case 0:
location.City = place.Label
case 1:
location.County = place.Label
case 2:
location.State = place.Label
case 3:
location.Country = place.Label
}

return nil
}

// fetchPlaceData fetches the JSON data for a given TGN URI.
func fetchPlaceData(uri string) (Place, error) {
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
slog.Error("Unable to create hierarchy request", "url", uri, "err", err)
return Place{}, err
}

req.Header.Set("Accept", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return Place{}, fmt.Errorf("error fetching data: %v", err)
}
defer resp.Body.Close()

body, err := io.ReadAll(resp.Body)
if err != nil {
return Place{}, fmt.Errorf("error reading response body: %v", err)
}

var place Place
if err := json.Unmarshal(body, &place); err != nil {
return Place{}, fmt.Errorf("error parsing JSON: %v", err)
}

return place, nil
}
44 changes: 44 additions & 0 deletions internal/tgn/tgn_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package tgn

import (
"testing"
)

func TestGetLocationFromTGN(t *testing.T) {
tests := map[string]struct {
URI string
Expected *Location
}{
"Test Bethlehem": {
URI: "http://vocab.getty.edu/page/tgn/7013416",
Expected: &Location{
Country: "United States",
State: "Pennsylvania",
County: "Northampton",
City: "Bethlehem",
},
},
"Test Coplay": {
URI: "http://vocab.getty.edu/page/tgn/2087483",
Expected: &Location{
Country: "United States",
State: "Pennsylvania",
County: "Lehigh",
City: "Coplay",
},
},
}

for name, tc := range tests {
t.Run(name, func(t *testing.T) {
location, err := GetLocationFromTGN(tc.URI)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

if *location != *tc.Expected {
t.Errorf("expected %+v, got %+v", tc.Expected, location)
}
})
}
}
14 changes: 13 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"encoding/csv"
"encoding/json"
"flag"
"fmt"
"log/slog"
Expand All @@ -11,6 +12,7 @@ import (
"strconv"
"strings"

"github.com/lehigh-university-libraries/fabricator/internal/tgn"
"github.com/lehigh-university-libraries/go-islandora/workbench"
)

Expand Down Expand Up @@ -84,7 +86,17 @@ func readCSVWithJSONTags(filePath string) (map[string]bool, []map[string][]strin
}
str = strings.TrimLeft(str, "0")
case "field_subject_hierarchical_geo":
str = `{"country":"United States","state":"Pennsylvania","county":"Lehigh","city":"Coplay"}`
tgn, err := tgn.GetLocationFromTGN(str)
if err != nil {
return nil, nil, fmt.Errorf("unknown TGN: %s %v", str, err)
}

locationJSON, err := json.Marshal(tgn)
if err != nil {
return nil, nil, fmt.Errorf("error marshalling TGN: %s %v", str, err)
}
str = string(locationJSON)

case "field_rights":
switch str {
case "IN COPYRIGHT":
Expand Down

0 comments on commit 3f99f74

Please sign in to comment.