From 3f99f744cd35af83b17bec6b312e55fbf923953b Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Tue, 27 Aug 2024 16:11:06 -0400 Subject: [PATCH] fixup hierarchical geo checking and marshalling --- internal/handlers/check.go | 26 ++------ internal/tgn/tgn.go | 128 +++++++++++++++++++++++++++++++++++++ internal/tgn/tgn_test.go | 44 +++++++++++++ main.go | 14 +++- 4 files changed, 191 insertions(+), 21 deletions(-) create mode 100644 internal/tgn/tgn.go create mode 100644 internal/tgn/tgn_test.go diff --git a/internal/handlers/check.go b/internal/handlers/check.go index c76be11..c1ee0ad 100644 --- a/internal/handlers/check.go +++ b/internal/handlers/check.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/golang-jwt/jwt/v5" + "github.com/lehigh-university-libraries/fabricator/internal/tgn" "github.com/lestrrat-go/jwx/jwk" edtf "github.com/sfomuseum/go-edtf/parser" ) @@ -111,7 +112,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { doiPattern := regexp.MustCompile(`^10\.\d{4,9}\/[-._;()/:A-Za-z0-9]+$`) gettyTgnPattern := regexp.MustCompile(`^http://vocab\.getty\.edu/page/tgn/\d+$`) datePattern := regexp.MustCompile(`^\d{4}(-\d{2}(-\d{2})?)?$`) - + hierarchyChecked := map[string]bool{} errors := map[string]string{} requiredFields := []string{ "Title", @@ -189,30 +190,15 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { errors[i] = "Invalid value. Must be Yes or No" } case "Hierarchical Geographic (Getty TGN)": - if !gettyTgnPattern.MatchString(cell) { - errors[i] = "Invalid Getty TGN URI" - } - hierarchyURL := strings.Replace(cell, "page", "hierarchy", 1) - - req, err := http.NewRequest("GET", hierarchyURL, nil) - if err != nil { + if hierarchyChecked[cell] { break } - req.Header.Set("Accept", "application/json") - - client := &http.Client{} - resp, err := client.Do(req) + hierarchyChecked[cell] = true + location, err := tgn.GetLocationFromTGN(tc.URI) if err != nil { - slog.Error("Unable to request hierarchy URL", "url", hierarchyURL, "err", err) - errors[i] = "Unable to request hierarchical information" - break + errors[i] = "Unable to get TGN" } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - slog.Error("Unable to get hierarchy URL", "url", hierarchyURL, "err", err) - errors[i] = "Unable to get hierarchical information" - } } } } diff --git a/internal/tgn/tgn.go b/internal/tgn/tgn.go new file mode 100644 index 0000000..f646283 --- /dev/null +++ b/internal/tgn/tgn.go @@ -0,0 +1,128 @@ +package tgn + +import ( + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" +) + +// Location represents the hierarchical location details stored in Islandora +type Location struct { + Country string `json:"country"` + State string `json:"state"` + County string `json:"county"` + City string `json:"city"` +} + +// Place represents the TGN data +type Place struct { + ID string `json:"id"` + Label string `json:"_label"` + PartOf []struct { + ID string `json:"id"` + Label string `json:"_label"` + } `json:"part_of"` +} + +// GetLocationFromTGN fetches the location information from a TGN URI. +func GetLocationFromTGN(uri string) (*Location, error) { + req, err := http.NewRequest("GET", uri, nil) + if err != nil { + slog.Error("Unable to create hierarchy request", "url", uri, "err", err) + return nil, err + } + + req.Header.Set("Accept", "application/json") + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("error fetching data: %v", err) + } + defer resp.Body.Close() + + // Read the response body + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("error reading response body: %v", err) + } + + // Parse the JSON data + var place Place + if err := json.Unmarshal(body, &place); err != nil { + return nil, fmt.Errorf("error parsing JSON: %v", err) + } + + location := &Location{} + + // Recursively process the hierarchy + err = resolveHierarchy(place, location, 0) + if err != nil { + return nil, err + } + + return location, nil +} + +// resolveHierarchy recursively resolves the hierarchy from the place data. +func resolveHierarchy(place Place, location *Location, depth int) error { + // If this place has no parent, it must be the city + if len(place.PartOf) == 0 { + location.Country = place.Label + return nil + } + + // Recursively resolve the parent hierarchy + parentPlace, err := fetchPlaceData(place.PartOf[0].ID + ".json") + if err != nil { + return err + } + err = resolveHierarchy(parentPlace, location, depth+1) + if err != nil { + return err + } + + // Assign the correct label based on depth + switch depth { + case 0: + location.City = place.Label + case 1: + location.County = place.Label + case 2: + location.State = place.Label + case 3: + location.Country = place.Label + } + + return nil +} + +// fetchPlaceData fetches the JSON data for a given TGN URI. +func fetchPlaceData(uri string) (Place, error) { + req, err := http.NewRequest("GET", uri, nil) + if err != nil { + slog.Error("Unable to create hierarchy request", "url", uri, "err", err) + return Place{}, err + } + + req.Header.Set("Accept", "application/json") + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return Place{}, fmt.Errorf("error fetching data: %v", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return Place{}, fmt.Errorf("error reading response body: %v", err) + } + + var place Place + if err := json.Unmarshal(body, &place); err != nil { + return Place{}, fmt.Errorf("error parsing JSON: %v", err) + } + + return place, nil +} diff --git a/internal/tgn/tgn_test.go b/internal/tgn/tgn_test.go new file mode 100644 index 0000000..e80f4b9 --- /dev/null +++ b/internal/tgn/tgn_test.go @@ -0,0 +1,44 @@ +package tgn + +import ( + "testing" +) + +func TestGetLocationFromTGN(t *testing.T) { + tests := map[string]struct { + URI string + Expected *Location + }{ + "Test Bethlehem": { + URI: "http://vocab.getty.edu/page/tgn/7013416", + Expected: &Location{ + Country: "United States", + State: "Pennsylvania", + County: "Northampton", + City: "Bethlehem", + }, + }, + "Test Coplay": { + URI: "http://vocab.getty.edu/page/tgn/2087483", + Expected: &Location{ + Country: "United States", + State: "Pennsylvania", + County: "Lehigh", + City: "Coplay", + }, + }, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + location, err := GetLocationFromTGN(tc.URI) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if *location != *tc.Expected { + t.Errorf("expected %+v, got %+v", tc.Expected, location) + } + }) + } +} diff --git a/main.go b/main.go index c2587dd..626c244 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package main import ( "encoding/csv" + "encoding/json" "flag" "fmt" "log/slog" @@ -11,6 +12,7 @@ import ( "strconv" "strings" + "github.com/lehigh-university-libraries/fabricator/internal/tgn" "github.com/lehigh-university-libraries/go-islandora/workbench" ) @@ -84,7 +86,17 @@ func readCSVWithJSONTags(filePath string) (map[string]bool, []map[string][]strin } str = strings.TrimLeft(str, "0") case "field_subject_hierarchical_geo": - str = `{"country":"United States","state":"Pennsylvania","county":"Lehigh","city":"Coplay"}` + tgn, err := tgn.GetLocationFromTGN(str) + if err != nil { + return nil, nil, fmt.Errorf("unknown TGN: %s %v", str, err) + } + + locationJSON, err := json.Marshal(tgn) + if err != nil { + return nil, nil, fmt.Errorf("error marshalling TGN: %s %v", str, err) + } + str = string(locationJSON) + case "field_rights": switch str { case "IN COPYRIGHT":