diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py index 352c7378..ccc299cf 100644 --- a/mapreader/spot_text/runner_base.py +++ b/mapreader/spot_text/runner_base.py @@ -500,7 +500,9 @@ def save_to_geojson( if centroid: geo_df["polygon"] = geo_df["geometry"].to_wkt() - geo_df["geometry"] = geo_df["geometry"].centroid + geo_df["geometry"] = ( + geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs) + ) geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio") @@ -553,7 +555,9 @@ def save_to_csv( geo_df = self._dict_to_dataframe(self.geo_predictions) if centroid: geo_df["polygon"] = geo_df["geometry"] - geo_df["geometry"] = geo_df["geometry"].centroid + geo_df["geometry"] = ( + geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs) + ) geo_df.to_csv(f"{path_save}/geo_predictions.csv") def show_predictions( @@ -751,15 +755,15 @@ def load_patch_predictions( "[ERROR] ``patch_preds`` must be a pandas DataFrame or path to a CSV file." ) - # if we have a polygon column, this implies the pixel_geometry column is the centroid - if "polygon" in patch_preds.columns: - patch_preds["pixel_geometry"] = patch_preds["polygon"] - patch_preds.drop(columns=["polygon"], inplace=True) + # if we have a polygon column, this implies the pixel_geometry column is the centroid + if "polygon" in patch_preds.columns: + patch_preds["pixel_geometry"] = patch_preds["polygon"] + patch_preds.drop(columns=["polygon"], inplace=True) - # convert pixel_geometry to shapely geometry - patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply( - lambda x: from_wkt(x) - ) + # convert pixel_geometry to shapely geometry + patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply( + lambda x: from_wkt(x) + ) self.patch_predictions = {} # reset patch predictions @@ -1068,6 +1072,8 @@ def save_search_results_to_geojson( if centroid: geo_df["polygon"] = geo_df["geometry"].to_wkt() - geo_df["geometry"] = geo_df["geometry"].centroid + geo_df["geometry"] = ( + geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs) + ) geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio") diff --git a/tests/test_text_spotting/test_deepsolo_runner.py b/tests/test_text_spotting/test_deepsolo_runner.py index eb22c8ed..ea9ed6c2 100644 --- a/tests/test_text_spotting/test_deepsolo_runner.py +++ b/tests/test_text_spotting/test_deepsolo_runner.py @@ -5,11 +5,13 @@ import pickle import geopandas as gpd +import numpy as np import pandas as pd import pytest from deepsolo.config import get_cfg from detectron2.engine import DefaultPredictor from detectron2.structures.instances import Instances +from shapely import Polygon from mapreader import DeepSoloRunner from mapreader.load import MapImages @@ -44,6 +46,7 @@ def init_dataframes(sample_dir, tmp_path): maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv") maps.patchify_all(patch_size=800, path_save=tmp_path) maps.check_georeferencing() + assert maps.georeferenced parent_df, patch_df = maps.convert_images() return parent_df, patch_df @@ -140,6 +143,80 @@ def test_deepsolo_init_tsv(init_dataframes, tmp_path): assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple) +def test_deepsolo_init_geojson(init_dataframes, tmp_path, mock_response): + parent_df, patch_df = init_dataframes + parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON") + patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON") + runner = DeepSoloRunner( + f"{tmp_path}/patch_df.geojson", + parent_df=f"{tmp_path}/parent_df.geojson", + cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + assert isinstance(runner, DeepSoloRunner) + assert isinstance(runner.predictor, DefaultPredictor) + assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon) + out = runner.run_all() + assert isinstance(out, dict) + assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys() + assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list) + assert isinstance( + out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction + ) + + +def test_deepsolo_init_errors(init_dataframes): + parent_df, patch_df = init_dataframes + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DeepSoloRunner( + patch_df="fake_file.txt", + parent_df=parent_df, + cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DeepSoloRunner( + patch_df=patch_df, + parent_df="fake_file.txt", + cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DeepSoloRunner( + patch_df=np.array([1, 2, 3]), + parent_df=parent_df, + ) + with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"): + DeepSoloRunner( + patch_df=patch_df, + parent_df=np.array([1, 2, 3]), + ) + + +def test_check_georeferencing(init_dataframes): + parent_df, patch_df = init_dataframes + runner = DeepSoloRunner( + patch_df, + parent_df=parent_df, + cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + runner.check_georeferencing() + assert runner.georeferenced + + runner = DeepSoloRunner( + patch_df, + parent_df=parent_df.drop(columns=["dlat", "dlon"]), + cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + runner.check_georeferencing() + assert runner.georeferenced + + runner = DeepSoloRunner( + patch_df, + parent_df=parent_df.drop(columns=["coordinates"]), + cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml", + ) + runner.check_georeferencing() + assert not runner.georeferenced + + def test_deepsolo_run_all(init_runner, mock_response): runner = init_runner # dict @@ -246,6 +323,151 @@ def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response): assert set(gdf.columns) == set( ["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"] ) + runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True) + assert os.path.exists(f"{tmp_path}/text_centroid.geojson") + gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson") + assert isinstance(gdf_centroid, gpd.GeoDataFrame) + assert set(gdf_centroid.columns) == set( + [ + "image_id", + "patch_id", + "pixel_geometry", + "geometry", + "crs", + "text", + "score", + "polygon", + ] + ) + + +def test_deepsolo_load_geo_predictions(runner_run_all, tmp_path): + runner = runner_run_all + _ = runner.convert_to_coords() + runner.save_to_geojson(f"{tmp_path}/text.geojson") + runner.geo_predictions = {} + runner.load_geo_predictions(f"{tmp_path}/text.geojson") + assert len(runner.geo_predictions) + assert "mapreader_text.png" in runner.geo_predictions.keys() + assert isinstance(runner.geo_predictions["mapreader_text.png"], list) + assert isinstance(runner.geo_predictions["mapreader_text.png"][0], GeoPrediction) + + +def test_deepsolo_load_geo_predictions_errors(runner_run_all, tmp_path): + runner = runner_run_all + with pytest.raises(ValueError, match="must be a path to a geojson file"): + runner.load_geo_predictions("fakefile.csv") + + +def test_deepsolo_save_to_csv_polygon(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + # patch + runner.save_to_csv(tmp_path) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + # parent + _ = runner.convert_to_parent_pixel_bounds() + runner.save_to_csv(tmp_path) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + # geo + _ = runner.convert_to_coords() + runner.save_to_csv(tmp_path) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + assert os.path.exists(f"{tmp_path}/geo_predictions.csv") + + +def test_deepsolo_save_to_csv_centroid(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + # patch + runner.save_to_csv(tmp_path, centroid=True) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + # parent + _ = runner.convert_to_parent_pixel_bounds() + runner.save_to_csv(tmp_path, centroid=True) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + # geo + _ = runner.convert_to_coords() + runner.save_to_csv(tmp_path, centroid=True) + assert os.path.exists(f"{tmp_path}/patch_predictions.csv") + assert os.path.exists(f"{tmp_path}/parent_predictions.csv") + assert os.path.exists(f"{tmp_path}/geo_predictions.csv") + + +def test_deepsolo_save_to_csv_errors(runner_run_all, tmp_path, mock_response): + runner = runner_run_all + runner.patch_predictions = {} + with pytest.raises(ValueError, match="No patch predictions found"): + runner.save_to_csv(tmp_path) + + +def test_deepsolo_load_patch_predictions(runner_run_all, tmp_path): + runner = runner_run_all + _ = runner.convert_to_coords() + assert len(runner.geo_predictions) # this will be empty after reloading + runner.save_to_csv(tmp_path) + runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv") + assert len(runner.patch_predictions) + assert len(runner.geo_predictions) == 0 + assert ( + "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys() + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0], + PatchPrediction, + ) + + +def test_deepsolo_load_patch_predictions_dataframe(runner_run_all): + runner = runner_run_all + patch_preds = runner._dict_to_dataframe(runner.patch_predictions) + _ = runner.convert_to_coords() + assert len(runner.geo_predictions) # this will be empty after reloading + runner.load_patch_predictions(patch_preds) + assert len(runner.patch_predictions) + assert len(runner.geo_predictions) == 0 + assert ( + "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys() + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0], + PatchPrediction, + ) + + +def test_deepsolo_load_patch_predictions_centroid(runner_run_all, tmp_path): + runner = runner_run_all + _ = runner.convert_to_coords() + assert len(runner.geo_predictions) + runner.save_to_csv(tmp_path, centroid=True) + runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv") + assert len(runner.patch_predictions) + assert len(runner.geo_predictions) == 0 + assert ( + "patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys() + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list + ) + assert isinstance( + runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0], + PatchPrediction, + ) + + +def test_deepsolo_load_patch_predictions_errors(runner_run_all, tmp_path): + runner = runner_run_all + with pytest.raises( + ValueError, match="must be a pandas DataFrame or path to a CSV file" + ): + runner.load_patch_predictions("fake_file.geojson") def test_deepsolo_search_preds(runner_run_all, mock_response):