From e3357bea44bf7237b3637f25143be7fdaf442dd2 Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Mon, 12 Aug 2024 11:23:28 +0100
Subject: [PATCH 1/3] options: Add StreamRole::Viewfinder when enumerating for
 --list-cameras

The change for rpi::ScalerCrops changes the behaviour of how crops are
advertised - we now need a non-RAW output stream to be configured to
return the available crop control.

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 core/options.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/core/options.cpp b/core/options.cpp
index 2d030c57..58805253 100644
--- a/core/options.cpp
+++ b/core/options.cpp
@@ -432,7 +432,8 @@ bool Options::Parse(int argc, char *argv[])
 				if (area)
 					sensor_props << (*area)[0].size().toString() << " ";
 
-				std::unique_ptr<CameraConfiguration> config = cam->generateConfiguration({libcamera::StreamRole::Raw});
+				std::unique_ptr<CameraConfiguration> config =
+					cam->generateConfiguration({ libcamera::StreamRole::Raw, libcamera::StreamRole::Viewfinder });
 				if (!config)
 					throw std::runtime_error("failed to generate capture configuration");
 				const StreamFormats &formats = config->at(0).formats();

From f58bfe1bab898d10eaeb9a733f0d287107cb97bd Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Tue, 6 Aug 2024 09:48:33 +0100
Subject: [PATCH 2/3] postprocess: hailo: Handle multiple scaler crops on the
 ISP outputs

Add a HailoPostProcessingStage::ConvertInferenceCoordinates() helper
that converts from the inference (low res) image co-ordinate space
to the main image space, accounting for different scaler crops.

Fixup the Yolo inference and pose estimation stages to use this new
helper.

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 .../hailo/hailo_postprocessing_stage.cpp      | 31 +++++++++++
 .../hailo/hailo_postprocessing_stage.hpp      |  5 ++
 .../hailo/hailo_yolo_inference.cpp            | 38 ++++++++++----
 .../hailo/hailo_yolov8_pose.cpp               | 51 ++++++++++++-------
 4 files changed, 96 insertions(+), 29 deletions(-)

diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.cpp b/post_processing_stages/hailo/hailo_postprocessing_stage.cpp
index 0ae3d7df..efe38a44 100644
--- a/post_processing_stages/hailo/hailo_postprocessing_stage.cpp
+++ b/post_processing_stages/hailo/hailo_postprocessing_stage.cpp
@@ -15,6 +15,9 @@
 
 using namespace hailort;
 
+using Rectangle = libcamera::Rectangle;
+using Size = libcamera::Size;
+
 Allocator::Allocator()
 {
 }
@@ -255,3 +258,31 @@ HailoROIPtr HailoPostProcessingStage::MakeROI(const std::vector<OutTensor> &outp
 
 	return roi;
 }
+
+Rectangle HailoPostProcessingStage::ConvertInferenceCoordinates(const std::vector<float> &coords,
+																const std::vector<Rectangle> &scaler_crops) const
+{
+	if (coords.size() != 4 || scaler_crops.size() != 2)
+		return {};
+
+	// Convert the inference image co-ordinates into the final ISP output co-ordinates.
+	const Size &isp_output_size = output_stream_->configuration().size;
+
+	// Object scaled to the full sensor resolution
+	Rectangle obj;
+	obj.x = std::round(coords[0] * (scaler_crops[1].width - 1));
+	obj.y = std::round(coords[1] * (scaler_crops[1].height - 1));
+	obj.width = std::round(coords[2] * (scaler_crops[1].width - 1));
+	obj.height = std::round(coords[3] * (scaler_crops[1].height - 1));
+
+	// Object on the low res scaler crop -> translated by the start of the low res crop offset
+	const Rectangle obj_translated_l = obj.translatedBy(scaler_crops[1].topLeft());
+	// -> bounded to the high res output crop
+	const Rectangle obj_bounded = obj_translated_l.boundedTo(scaler_crops[0]);
+	// -> translated to the start of the high res crop offset
+	const Rectangle obj_translated_h = obj_bounded.translatedBy(-scaler_crops[0].topLeft());
+	// -> and scaled to the ISP output.
+	const Rectangle obj_scaled = obj_translated_h.scaledBy(isp_output_size, scaler_crops[0].size());
+
+	return obj_scaled;
+}
diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.hpp b/post_processing_stages/hailo/hailo_postprocessing_stage.hpp
index bf2955a0..e38ae5f4 100644
--- a/post_processing_stages/hailo/hailo_postprocessing_stage.hpp
+++ b/post_processing_stages/hailo/hailo_postprocessing_stage.hpp
@@ -13,6 +13,8 @@
 #include <string>
 #include <vector>
 
+#include <libcamera/geometry.h>
+
 #include <hailo/hailort.hpp>
 #include "hailo_objects.hpp"
 
@@ -110,6 +112,9 @@ class HailoPostProcessingStage : public PostProcessingStage
 	hailo_status DispatchJob(const uint8_t *input, hailort::AsyncInferJob &job, std::vector<OutTensor> &output_tensors);
 	HailoROIPtr MakeROI(const std::vector<OutTensor> &output_tensors) const;
 
+	libcamera::Rectangle ConvertInferenceCoordinates(const std::vector<float> &coords,
+													 const std::vector<libcamera::Rectangle> &scaler_crops) const;
+
 	libcamera::Stream *low_res_stream_;
 	libcamera::Stream *output_stream_;
 	libcamera::Stream *raw_stream_;
diff --git a/post_processing_stages/hailo/hailo_yolo_inference.cpp b/post_processing_stages/hailo/hailo_yolo_inference.cpp
index 9720c1b2..85883755 100644
--- a/post_processing_stages/hailo/hailo_yolo_inference.cpp
+++ b/post_processing_stages/hailo/hailo_yolo_inference.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <vector>
 
+#include <libcamera/controls.h>
 #include <libcamera/geometry.h>
 
 #include "core/rpicam_app.hpp"
@@ -28,6 +29,8 @@ using PostProcFuncPtrNms = void (*)(HailoROIPtr);
 using InitFuncPtr = YoloParams *(*)(std::string, std::string);
 using FreeFuncPtr = void (*)(void *);
 
+using Rectangle = libcamera::Rectangle;
+
 namespace fs = std::filesystem;
 
 #define NAME "hailo_yolo_inference"
@@ -49,7 +52,7 @@ class YoloInference : public HailoPostProcessingStage
 	bool Process(CompletedRequestPtr &completed_request) override;
 
 private:
-	std::vector<Detection> runInference(const uint8_t *frame);
+	std::vector<Detection> runInference(const uint8_t *frame, const std::vector<libcamera::Rectangle> &scaler_crops);
 	void filterOutputObjects(std::vector<Detection> &objects);
 
 	struct LtObject
@@ -187,7 +190,23 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
-	std::vector<Detection> objects = runInference(input_ptr);
+	std::vector<Rectangle> scaler_crops;
+	auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
+	auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);
+
+	if (rpi_scaler_crop)
+	{
+		for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
+			scaler_crops.push_back(rpi_scaler_crop->data()[i]);
+	}
+	else if (scaler_crop)
+	{
+		// Push-back twice, once for main, once for low res.
+		scaler_crops.push_back(*scaler_crop);
+		scaler_crops.push_back(*scaler_crop);
+	}
+
+	std::vector<Detection> objects = runInference(input_ptr, scaler_crops);
 	if (objects.size())
 	{
 		if (temporal_filtering_)
@@ -215,7 +234,7 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
 	return false;
 }
 
-std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
+std::vector<Detection> YoloInference::runInference(const uint8_t *frame, const std::vector<Rectangle> &scaler_crops)
 {
 	hailort::AsyncInferJob job;
 	std::vector<OutTensor> output_tensors;
@@ -265,7 +284,6 @@ std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
 
 	// Translate results to the rpicam-apps Detection objects
 	std::vector<Detection> results;
-	Size output_size = output_stream_->configuration().size;
 	for (auto const &d : detections)
 	{
 		if (d->get_confidence() < threshold_)
@@ -273,12 +291,12 @@ std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
 
 		// Extract bounding box co-ordinates in the output image co-ordinates.
 		auto const &box = d->get_bbox();
-		int x0 = std::round(std::max(box.xmin(), 0.0f) * (output_size.width - 1));
-		int x1 = std::round(std::min(box.xmax(), 1.0f) * (output_size.width - 1));
-		int y0 = std::round(std::max(box.ymin(), 0.0f) * (output_size.height - 1));
-		int y1 = std::round(std::min(box.ymax(), 1.0f) * (output_size.height - 1));
-		results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), x0, y0,
-							 x1 - x0, y1 - y0);
+		const float x0 = std::max(box.xmin(), 0.0f);
+		const float x1 = std::min(box.xmax(), 1.0f);
+		const float y0 = std::max(box.ymin(), 0.0f);
+		const float y1 = std::min(box.ymax(), 1.0f);
+		libcamera::Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
+		results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), r.x, r.y, r.width, r.height);
 		LOG(2, "Object: " << results.back().toString());
 
 		if (--max_detections_ == 0)
diff --git a/post_processing_stages/hailo/hailo_yolov8_pose.cpp b/post_processing_stages/hailo/hailo_yolov8_pose.cpp
index 16a06897..72a10342 100644
--- a/post_processing_stages/hailo/hailo_yolov8_pose.cpp
+++ b/post_processing_stages/hailo/hailo_yolov8_pose.cpp
@@ -18,6 +18,7 @@
 #include "core/rpicam_app.hpp"
 #include "hailo_postprocessing_stage.hpp"
 
+using Rectangle = libcamera::Rectangle;
 using Size = libcamera::Size;
 using PostProcFuncPtr = std::pair<std::vector<KeyPt>, std::vector<PairPairs>>(*)(HailoROIPtr);
 
@@ -38,7 +39,7 @@ class YoloPose : public HailoPostProcessingStage
 	bool Process(CompletedRequestPtr &completed_request) override;
 
 private:
-	void runInference(const uint8_t *input, uint32_t *output);
+	void runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops);
 
 	PostProcessingLib postproc_;
 };
@@ -113,16 +114,32 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request)
 		return false;
 	}
 
+	std::vector<Rectangle> scaler_crops;
+	auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
+	auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);
+
+	if (rpi_scaler_crop)
+	{
+		for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
+			scaler_crops.push_back(rpi_scaler_crop->data()[i]);
+	}
+	else if (scaler_crop)
+	{
+		// Push-back twice, once for main, once for low res.
+		scaler_crops.push_back(*scaler_crop);
+		scaler_crops.push_back(*scaler_crop);
+	}
+
 	BufferWriteSync w(app_, completed_request->buffers[output_stream_]);
 	libcamera::Span<uint8_t> buffer = w.Get()[0];
 	uint32_t *output = (uint32_t *)buffer.data();
 
-	runInference(input_ptr, output);
+	runInference(input_ptr, output, scaler_crops);
 
 	return false;
 }
 
-void YoloPose::runInference(const uint8_t *input, uint32_t *output)
+void YoloPose::runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops)
 {
 	hailort::AsyncInferJob job;
 	std::vector<OutTensor> output_tensors;
@@ -159,30 +176,26 @@ void YoloPose::runInference(const uint8_t *input, uint32_t *output)
 			continue;
 
 		HailoBBox bbox = detection->get_bbox();
-
-		cv::rectangle(image,
-					  cv::Point2f(bbox.xmin() * float(output_stream_info_.width),
-								  bbox.ymin() * float(output_stream_info_.height)),
-					  cv::Point2f(bbox.xmax() * float(output_stream_info_.width),
-								  bbox.ymax() * float(output_stream_info_.height)),
+		const float x0 = std::max(bbox.xmin(), 0.0f);
+		const float x1 = std::min(bbox.xmax(), 1.0f);
+		const float y0 = std::max(bbox.ymin(), 0.0f);
+		const float y1 = std::min(bbox.ymax(), 1.0f);
+		Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
+		cv::rectangle(image, cv::Point2f(r.x, r.y), cv::Point2f(r.x + r.width, r.y + r.height),
 					  cv::Scalar(0, 0, 255), 1);
 	}
 
 	for (auto &keypoint : keypoints_and_pairs.first)
 	{
-		cv::circle(image,
-				   cv::Point(keypoint.xs * float(output_stream_info_.width),
-							 keypoint.ys * float(output_stream_info_.height)),
-				   3, cv::Scalar(255, 0, 255), -1);
+		Rectangle r = ConvertInferenceCoordinates({ keypoint.xs, keypoint.ys, 1, 1 }, scaler_crops);
+		cv::circle(image, cv::Point(r.x, r.y), 3, cv::Scalar(255, 0, 255), -1);
 	}
 
-	for (PairPairs &p : keypoints_and_pairs.second)
+	for (const PairPairs &p : keypoints_and_pairs.second)
 	{
-		auto pt1 =
-			cv::Point(p.pt1.first * float(output_stream_info_.width), p.pt1.second * float(output_stream_info_.height));
-		auto pt2 =
-			cv::Point(p.pt2.first * float(output_stream_info_.width), p.pt2.second * float(output_stream_info_.height));
-		cv::line(image, pt1, pt2, cv::Scalar(255, 0, 255), 3);
+		Rectangle r1 = ConvertInferenceCoordinates({ p.pt1.first, p.pt1.second, 1, 1 }, scaler_crops);
+		Rectangle r2 = ConvertInferenceCoordinates({ p.pt2.first, p.pt2.second, 1, 1 }, scaler_crops);
+		cv::line(image, cv::Point(r1.x, r1.y), cv::Point(r2.x, r2.y), cv::Scalar(255, 0, 255), 3);
 	}
 }
 

From 92938e90fcaa79ce382012e87409c385486f98ed Mon Sep 17 00:00:00 2001
From: Naushir Patuck <naush@raspberrypi.com>
Date: Wed, 7 Aug 2024 10:48:02 +0100
Subject: [PATCH 3/3] core: Add aspect ratio control for lores stream

Add a new command line argument "--lores-par" that when set, preserves
the 1:1 aspect ratio of the low res stream. This is only possible on the
PiSP platform. The default behaviour is to have this switch disabled,
allowing for identical behaviour betwen VC4 and PiSP platforms. This
requires the use of the rpi::ScalerCrops vendor control.

This switch can also be triggered via the "rpicam-apps.lores.par" key
in the postprocessing JSON file.

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
---
 core/options.cpp        |  7 +++++--
 core/options.hpp        |  1 +
 core/post_processor.cpp |  2 ++
 core/rpicam_app.cpp     | 38 ++++++++++++++++++++++++++++----------
 4 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/core/options.cpp b/core/options.cpp
index 58805253..fdd1ef20 100644
--- a/core/options.cpp
+++ b/core/options.cpp
@@ -270,9 +270,11 @@ Options::Options()
 		("tuning-file", value<std::string>(&tuning_file)->default_value("-"),
 			"Name of camera tuning file to use, omit this option for libcamera default behaviour")
 		("lores-width", value<unsigned int>(&lores_width)->default_value(0),
-			"Width of low resolution frames (use 0 to omit low resolution stream")
+			"Width of low resolution frames (use 0 to omit low resolution stream)")
 		("lores-height", value<unsigned int>(&lores_height)->default_value(0),
-			"Height of low resolution frames (use 0 to omit low resolution stream")
+			"Height of low resolution frames (use 0 to omit low resolution stream)")
+		("lores-par", value<bool>(&lores_par)->default_value(false)->implicit_value(true),
+			"Preserve the 1:1 pixel aspect ratio of the low res image (where possible) by applying a different crop on the stream.")
 		("mode", value<std::string>(&mode_string),
 			"Camera mode as W:H:bit-depth:packing, where packing is P (packed) or U (unpacked)")
 		("viewfinder-mode", value<std::string>(&viewfinder_mode_string),
@@ -695,6 +697,7 @@ void Options::Print() const
 	std::cerr << "    tuning-file: " << (tuning_file == "-" ? "(libcamera)" : tuning_file) << std::endl;
 	std::cerr << "    lores-width: " << lores_width << std::endl;
 	std::cerr << "    lores-height: " << lores_height << std::endl;
+	std::cerr << "    lores-par: " << lores_par << std::endl;
 	if (afMode_index != -1)
 		std::cerr << "    autofocus-mode: " << afMode << std::endl;
 	if (afRange_index != -1)
diff --git a/core/options.hpp b/core/options.hpp
index d6b5cb16..624c14b4 100644
--- a/core/options.hpp
+++ b/core/options.hpp
@@ -151,6 +151,7 @@ struct Options
 	bool qt_preview;
 	unsigned int lores_width;
 	unsigned int lores_height;
+	bool lores_par;
 	unsigned int camera;
 	std::string mode_string;
 	Mode mode;
diff --git a/core/post_processor.cpp b/core/post_processor.cpp
index 5d796faf..638226c1 100644
--- a/core/post_processor.cpp
+++ b/core/post_processor.cpp
@@ -120,6 +120,7 @@ void PostProcessor::Read(std::string const &filename)
 
 				unsigned int lores_width = node.get<unsigned int>("lores.width");
 				unsigned int lores_height = node.get<unsigned int>("lores.height");
+				bool lores_par = node.get<bool>("lores.par", app_->GetOptions()->lores_par);
 				std::string lores_format_str = node.get<std::string>("lores.format", "yuv420");
 
 				libcamera::PixelFormat lores_format = libcamera::formats::YUV420;
@@ -132,6 +133,7 @@ void PostProcessor::Read(std::string const &filename)
 
 				app_->GetOptions()->lores_width = lores_width;
 				app_->GetOptions()->lores_height = lores_height;
+				app_->GetOptions()->lores_par = lores_par;
 				app_->lores_format_ = lores_format;
 
 				LOG(1, "Postprocessing requested lores: " << lores_width << "x" << lores_height << " " << lores_format);
diff --git a/core/rpicam_app.cpp b/core/rpicam_app.cpp
index 16232b9e..7c1174fa 100644
--- a/core/rpicam_app.cpp
+++ b/core/rpicam_app.cpp
@@ -635,17 +635,35 @@ void RPiCamApp::StartCamera()
 
 	// Build a list of initial controls that we must set in the camera before starting it.
 	// We don't overwrite anything the application may have set before calling us.
-	if (!controls_.get(controls::ScalerCrop) && options_->roi_width != 0 && options_->roi_height != 0)
+	if (!controls_.get(controls::ScalerCrop) && !controls_.get(controls::rpi::ScalerCrops))
 	{
-		Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get<Rectangle>();
-		int x = options_->roi_x * sensor_area.width;
-		int y = options_->roi_y * sensor_area.height;
-		int w = options_->roi_width * sensor_area.width;
-		int h = options_->roi_height * sensor_area.height;
-		Rectangle crop(x, y, w, h);
-		crop.translateBy(sensor_area.topLeft());
-		LOG(2, "Using crop " << crop.toString());
-		controls_.set(controls::ScalerCrop, crop);
+		const Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get<Rectangle>();
+		const Rectangle default_crop = camera_->controls().at(&controls::ScalerCrop).def().get<Rectangle>();
+		std::vector<Rectangle> crops;
+
+		if (options_->roi_width != 0 && options_->roi_height != 0)
+		{
+			int x = options_->roi_x * sensor_area.width;
+			int y = options_->roi_y * sensor_area.height;
+			unsigned int w = options_->roi_width * sensor_area.width;
+			unsigned int h = options_->roi_height * sensor_area.height;
+			crops.push_back({ x, y, w, h });
+			crops.back().translateBy(sensor_area.topLeft());
+		}
+		else
+		{
+			crops.push_back(default_crop);
+		}
+
+		LOG(2, "Using crop (main) " << crops.back().toString());
+
+		if (options_->lores_width != 0 && options_->lores_height != 0 && !options_->lores_par)
+		{
+			crops.push_back(crops.back());
+			LOG(2, "Using crop (lores) " << crops.back().toString());
+		}
+
+		controls_.set(controls::rpi::ScalerCrops, libcamera::Span<const Rectangle>(crops.data(), crops.size()));
 	}
 
 	if (!controls_.get(controls::AfWindows) && !controls_.get(controls::AfMetering) && options_->afWindow_width != 0 &&