Skip to content

Commit

Permalink
add tests deepsolo
Browse files Browse the repository at this point in the history
  • Loading branch information
rwood-97 committed Dec 4, 2024
1 parent a46f481 commit 13ba2fc
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 11 deletions.
28 changes: 17 additions & 11 deletions mapreader/spot_text/runner_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,9 @@ def save_to_geojson(

if centroid:
geo_df["polygon"] = geo_df["geometry"].to_wkt()
geo_df["geometry"] = geo_df["geometry"].centroid
geo_df["geometry"] = (
geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
)

geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")

Expand Down Expand Up @@ -553,7 +555,9 @@ def save_to_csv(
geo_df = self._dict_to_dataframe(self.geo_predictions)
if centroid:
geo_df["polygon"] = geo_df["geometry"]
geo_df["geometry"] = geo_df["geometry"].centroid
geo_df["geometry"] = (
geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
)
geo_df.to_csv(f"{path_save}/geo_predictions.csv")

def show_predictions(
Expand Down Expand Up @@ -751,15 +755,15 @@ def load_patch_predictions(
"[ERROR] ``patch_preds`` must be a pandas DataFrame or path to a CSV file."
)

# if we have a polygon column, this implies the pixel_geometry column is the centroid
if "polygon" in patch_preds.columns:
patch_preds["pixel_geometry"] = patch_preds["polygon"]
patch_preds.drop(columns=["polygon"], inplace=True)
# if we have a polygon column, this implies the pixel_geometry column is the centroid
if "polygon" in patch_preds.columns:
patch_preds["pixel_geometry"] = patch_preds["polygon"]
patch_preds.drop(columns=["polygon"], inplace=True)

# convert pixel_geometry to shapely geometry
patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply(
lambda x: from_wkt(x)
)
# convert pixel_geometry to shapely geometry
patch_preds["pixel_geometry"] = patch_preds["pixel_geometry"].apply(
lambda x: from_wkt(x)
)

self.patch_predictions = {} # reset patch predictions

Expand Down Expand Up @@ -1068,6 +1072,8 @@ def save_search_results_to_geojson(

if centroid:
geo_df["polygon"] = geo_df["geometry"].to_wkt()
geo_df["geometry"] = geo_df["geometry"].centroid
geo_df["geometry"] = (

Check warning on line 1075 in mapreader/spot_text/runner_base.py

View check run for this annotation

Codecov / codecov/patch

mapreader/spot_text/runner_base.py#L1074-L1075

Added lines #L1074 - L1075 were not covered by tests
geo_df["geometry"].to_crs("27700").centroid.to_crs(geo_df.crs)
)

geo_df.to_file(path_save, driver="GeoJSON", engine="pyogrio")
222 changes: 222 additions & 0 deletions tests/test_text_spotting/test_deepsolo_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import pickle

import geopandas as gpd
import numpy as np
import pandas as pd
import pytest
from deepsolo.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.structures.instances import Instances
from shapely import Polygon

from mapreader import DeepSoloRunner
from mapreader.load import MapImages
Expand Down Expand Up @@ -44,6 +46,7 @@ def init_dataframes(sample_dir, tmp_path):
maps.add_metadata(f"{sample_dir}/mapreader_text_metadata.csv")
maps.patchify_all(patch_size=800, path_save=tmp_path)
maps.check_georeferencing()
assert maps.georeferenced
parent_df, patch_df = maps.convert_images()
return parent_df, patch_df

Expand Down Expand Up @@ -140,6 +143,80 @@ def test_deepsolo_init_tsv(init_dataframes, tmp_path):
assert isinstance(runner.patch_df.iloc[0]["coordinates"], tuple)


def test_deepsolo_init_geojson(init_dataframes, tmp_path, mock_response):
parent_df, patch_df = init_dataframes
parent_df.to_file(f"{tmp_path}/parent_df.geojson", driver="GeoJSON")
patch_df.to_file(f"{tmp_path}/patch_df.geojson", driver="GeoJSON")
runner = DeepSoloRunner(
f"{tmp_path}/patch_df.geojson",
parent_df=f"{tmp_path}/parent_df.geojson",
cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
)
assert isinstance(runner, DeepSoloRunner)
assert isinstance(runner.predictor, DefaultPredictor)
assert isinstance(runner.parent_df.iloc[0]["geometry"], Polygon)
out = runner.run_all()
assert isinstance(out, dict)
assert "patch-0-0-800-40-#mapreader_text.png#.png" in out.keys()
assert isinstance(out["patch-0-0-800-40-#mapreader_text.png#.png"], list)
assert isinstance(
out["patch-0-0-800-40-#mapreader_text.png#.png"][0], PatchPrediction
)


def test_deepsolo_init_errors(init_dataframes):
parent_df, patch_df = init_dataframes
with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
DeepSoloRunner(
patch_df="fake_file.txt",
parent_df=parent_df,
cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
)
with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
DeepSoloRunner(
patch_df=patch_df,
parent_df="fake_file.txt",
cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
)
with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
DeepSoloRunner(
patch_df=np.array([1, 2, 3]),
parent_df=parent_df,
)
with pytest.raises(ValueError, match="path to a CSV/TSV/etc or geojson"):
DeepSoloRunner(
patch_df=patch_df,
parent_df=np.array([1, 2, 3]),
)


def test_check_georeferencing(init_dataframes):
parent_df, patch_df = init_dataframes
runner = DeepSoloRunner(
patch_df,
parent_df=parent_df,
cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
)
runner.check_georeferencing()
assert runner.georeferenced

runner = DeepSoloRunner(
patch_df,
parent_df=parent_df.drop(columns=["dlat", "dlon"]),
cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
)
runner.check_georeferencing()
assert runner.georeferenced

runner = DeepSoloRunner(
patch_df,
parent_df=parent_df.drop(columns=["coordinates"]),
cfg_file=f"{DEEPSOLO_PATH}/configs/R_50/IC15/finetune_150k_tt_mlt_13_15_textocr.yaml",
)
runner.check_georeferencing()
assert not runner.georeferenced


def test_deepsolo_run_all(init_runner, mock_response):
runner = init_runner
# dict
Expand Down Expand Up @@ -246,6 +323,151 @@ def test_deepsolo_save_to_geojson(runner_run_all, tmp_path, mock_response):
assert set(gdf.columns) == set(
["image_id", "patch_id", "pixel_geometry", "geometry", "crs", "text", "score"]
)
runner.save_to_geojson(f"{tmp_path}/text_centroid.geojson", centroid=True)
assert os.path.exists(f"{tmp_path}/text_centroid.geojson")
gdf_centroid = gpd.read_file(f"{tmp_path}/text_centroid.geojson")
assert isinstance(gdf_centroid, gpd.GeoDataFrame)
assert set(gdf_centroid.columns) == set(
[
"image_id",
"patch_id",
"pixel_geometry",
"geometry",
"crs",
"text",
"score",
"polygon",
]
)


def test_deepsolo_load_geo_predictions(runner_run_all, tmp_path):
runner = runner_run_all
_ = runner.convert_to_coords()
runner.save_to_geojson(f"{tmp_path}/text.geojson")
runner.geo_predictions = {}
runner.load_geo_predictions(f"{tmp_path}/text.geojson")
assert len(runner.geo_predictions)
assert "mapreader_text.png" in runner.geo_predictions.keys()
assert isinstance(runner.geo_predictions["mapreader_text.png"], list)
assert isinstance(runner.geo_predictions["mapreader_text.png"][0], GeoPrediction)


def test_deepsolo_load_geo_predictions_errors(runner_run_all, tmp_path):
runner = runner_run_all
with pytest.raises(ValueError, match="must be a path to a geojson file"):
runner.load_geo_predictions("fakefile.csv")


def test_deepsolo_save_to_csv_polygon(runner_run_all, tmp_path, mock_response):
runner = runner_run_all
# patch
runner.save_to_csv(tmp_path)
assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
# parent
_ = runner.convert_to_parent_pixel_bounds()
runner.save_to_csv(tmp_path)
assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
# geo
_ = runner.convert_to_coords()
runner.save_to_csv(tmp_path)
assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
assert os.path.exists(f"{tmp_path}/geo_predictions.csv")


def test_deepsolo_save_to_csv_centroid(runner_run_all, tmp_path, mock_response):
runner = runner_run_all
# patch
runner.save_to_csv(tmp_path, centroid=True)
assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
# parent
_ = runner.convert_to_parent_pixel_bounds()
runner.save_to_csv(tmp_path, centroid=True)
assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
# geo
_ = runner.convert_to_coords()
runner.save_to_csv(tmp_path, centroid=True)
assert os.path.exists(f"{tmp_path}/patch_predictions.csv")
assert os.path.exists(f"{tmp_path}/parent_predictions.csv")
assert os.path.exists(f"{tmp_path}/geo_predictions.csv")


def test_deepsolo_save_to_csv_errors(runner_run_all, tmp_path, mock_response):
runner = runner_run_all
runner.patch_predictions = {}
with pytest.raises(ValueError, match="No patch predictions found"):
runner.save_to_csv(tmp_path)


def test_deepsolo_load_patch_predictions(runner_run_all, tmp_path):
runner = runner_run_all
_ = runner.convert_to_coords()
assert len(runner.geo_predictions) # this will be empty after reloading
runner.save_to_csv(tmp_path)
runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
assert len(runner.patch_predictions)
assert len(runner.geo_predictions) == 0
assert (
"patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
)
assert isinstance(
runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
)
assert isinstance(
runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
PatchPrediction,
)


def test_deepsolo_load_patch_predictions_dataframe(runner_run_all):
runner = runner_run_all
patch_preds = runner._dict_to_dataframe(runner.patch_predictions)
_ = runner.convert_to_coords()
assert len(runner.geo_predictions) # this will be empty after reloading
runner.load_patch_predictions(patch_preds)
assert len(runner.patch_predictions)
assert len(runner.geo_predictions) == 0
assert (
"patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
)
assert isinstance(
runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
)
assert isinstance(
runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
PatchPrediction,
)


def test_deepsolo_load_patch_predictions_centroid(runner_run_all, tmp_path):
runner = runner_run_all
_ = runner.convert_to_coords()
assert len(runner.geo_predictions)
runner.save_to_csv(tmp_path, centroid=True)
runner.load_patch_predictions(f"{tmp_path}/patch_predictions.csv")
assert len(runner.patch_predictions)
assert len(runner.geo_predictions) == 0
assert (
"patch-0-0-800-40-#mapreader_text.png#.png" in runner.patch_predictions.keys()
)
assert isinstance(
runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"], list
)
assert isinstance(
runner.patch_predictions["patch-0-0-800-40-#mapreader_text.png#.png"][0],
PatchPrediction,
)


def test_deepsolo_load_patch_predictions_errors(runner_run_all, tmp_path):
runner = runner_run_all
with pytest.raises(
ValueError, match="must be a pandas DataFrame or path to a CSV file"
):
runner.load_patch_predictions("fake_file.geojson")


def test_deepsolo_search_preds(runner_run_all, mock_response):
Expand Down

0 comments on commit 13ba2fc

Please sign in to comment.