From e3357bea44bf7237b3637f25143be7fdaf442dd2 Mon Sep 17 00:00:00 2001 From: Naushir Patuck Date: Mon, 12 Aug 2024 11:23:28 +0100 Subject: [PATCH 1/3] options: Add StreamRole::Viewfinder when enumerating for --list-cameras The change for rpi::ScalerCrops changes the behaviour of how crops are advertised - we now need a non-RAW output stream to be configured to return the available crop control. Signed-off-by: Naushir Patuck --- core/options.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/options.cpp b/core/options.cpp index 2d030c57..58805253 100644 --- a/core/options.cpp +++ b/core/options.cpp @@ -432,7 +432,8 @@ bool Options::Parse(int argc, char *argv[]) if (area) sensor_props << (*area)[0].size().toString() << " "; - std::unique_ptr config = cam->generateConfiguration({libcamera::StreamRole::Raw}); + std::unique_ptr config = + cam->generateConfiguration({ libcamera::StreamRole::Raw, libcamera::StreamRole::Viewfinder }); if (!config) throw std::runtime_error("failed to generate capture configuration"); const StreamFormats &formats = config->at(0).formats(); From f58bfe1bab898d10eaeb9a733f0d287107cb97bd Mon Sep 17 00:00:00 2001 From: Naushir Patuck Date: Tue, 6 Aug 2024 09:48:33 +0100 Subject: [PATCH 2/3] postprocess: hailo: Handle multiple scaler crops on the ISP outputs Add a HailoPostProcessingStage::ConvertInferenceCoordinates() helper that converts from the inference (low res) image co-ordinate space to the main image space, accounting for different scaler crops. Fixup the Yolo inference and pose estimation stages to use this new helper. Signed-off-by: Naushir Patuck --- .../hailo/hailo_postprocessing_stage.cpp | 31 +++++++++++ .../hailo/hailo_postprocessing_stage.hpp | 5 ++ .../hailo/hailo_yolo_inference.cpp | 38 ++++++++++---- .../hailo/hailo_yolov8_pose.cpp | 51 ++++++++++++------- 4 files changed, 96 insertions(+), 29 deletions(-) diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.cpp b/post_processing_stages/hailo/hailo_postprocessing_stage.cpp index 0ae3d7df..efe38a44 100644 --- a/post_processing_stages/hailo/hailo_postprocessing_stage.cpp +++ b/post_processing_stages/hailo/hailo_postprocessing_stage.cpp @@ -15,6 +15,9 @@ using namespace hailort; +using Rectangle = libcamera::Rectangle; +using Size = libcamera::Size; + Allocator::Allocator() { } @@ -255,3 +258,31 @@ HailoROIPtr HailoPostProcessingStage::MakeROI(const std::vector &outp return roi; } + +Rectangle HailoPostProcessingStage::ConvertInferenceCoordinates(const std::vector &coords, + const std::vector &scaler_crops) const +{ + if (coords.size() != 4 || scaler_crops.size() != 2) + return {}; + + // Convert the inference image co-ordinates into the final ISP output co-ordinates. + const Size &isp_output_size = output_stream_->configuration().size; + + // Object scaled to the full sensor resolution + Rectangle obj; + obj.x = std::round(coords[0] * (scaler_crops[1].width - 1)); + obj.y = std::round(coords[1] * (scaler_crops[1].height - 1)); + obj.width = std::round(coords[2] * (scaler_crops[1].width - 1)); + obj.height = std::round(coords[3] * (scaler_crops[1].height - 1)); + + // Object on the low res scaler crop -> translated by the start of the low res crop offset + const Rectangle obj_translated_l = obj.translatedBy(scaler_crops[1].topLeft()); + // -> bounded to the high res output crop + const Rectangle obj_bounded = obj_translated_l.boundedTo(scaler_crops[0]); + // -> translated to the start of the high res crop offset + const Rectangle obj_translated_h = obj_bounded.translatedBy(-scaler_crops[0].topLeft()); + // -> and scaled to the ISP output. + const Rectangle obj_scaled = obj_translated_h.scaledBy(isp_output_size, scaler_crops[0].size()); + + return obj_scaled; +} diff --git a/post_processing_stages/hailo/hailo_postprocessing_stage.hpp b/post_processing_stages/hailo/hailo_postprocessing_stage.hpp index bf2955a0..e38ae5f4 100644 --- a/post_processing_stages/hailo/hailo_postprocessing_stage.hpp +++ b/post_processing_stages/hailo/hailo_postprocessing_stage.hpp @@ -13,6 +13,8 @@ #include #include +#include + #include #include "hailo_objects.hpp" @@ -110,6 +112,9 @@ class HailoPostProcessingStage : public PostProcessingStage hailo_status DispatchJob(const uint8_t *input, hailort::AsyncInferJob &job, std::vector &output_tensors); HailoROIPtr MakeROI(const std::vector &output_tensors) const; + libcamera::Rectangle ConvertInferenceCoordinates(const std::vector &coords, + const std::vector &scaler_crops) const; + libcamera::Stream *low_res_stream_; libcamera::Stream *output_stream_; libcamera::Stream *raw_stream_; diff --git a/post_processing_stages/hailo/hailo_yolo_inference.cpp b/post_processing_stages/hailo/hailo_yolo_inference.cpp index 9720c1b2..85883755 100644 --- a/post_processing_stages/hailo/hailo_yolo_inference.cpp +++ b/post_processing_stages/hailo/hailo_yolo_inference.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include "core/rpicam_app.hpp" @@ -28,6 +29,8 @@ using PostProcFuncPtrNms = void (*)(HailoROIPtr); using InitFuncPtr = YoloParams *(*)(std::string, std::string); using FreeFuncPtr = void (*)(void *); +using Rectangle = libcamera::Rectangle; + namespace fs = std::filesystem; #define NAME "hailo_yolo_inference" @@ -49,7 +52,7 @@ class YoloInference : public HailoPostProcessingStage bool Process(CompletedRequestPtr &completed_request) override; private: - std::vector runInference(const uint8_t *frame); + std::vector runInference(const uint8_t *frame, const std::vector &scaler_crops); void filterOutputObjects(std::vector &objects); struct LtObject @@ -187,7 +190,23 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request) return false; } - std::vector objects = runInference(input_ptr); + std::vector scaler_crops; + auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop); + auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops); + + if (rpi_scaler_crop) + { + for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++) + scaler_crops.push_back(rpi_scaler_crop->data()[i]); + } + else if (scaler_crop) + { + // Push-back twice, once for main, once for low res. + scaler_crops.push_back(*scaler_crop); + scaler_crops.push_back(*scaler_crop); + } + + std::vector objects = runInference(input_ptr, scaler_crops); if (objects.size()) { if (temporal_filtering_) @@ -215,7 +234,7 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request) return false; } -std::vector YoloInference::runInference(const uint8_t *frame) +std::vector YoloInference::runInference(const uint8_t *frame, const std::vector &scaler_crops) { hailort::AsyncInferJob job; std::vector output_tensors; @@ -265,7 +284,6 @@ std::vector YoloInference::runInference(const uint8_t *frame) // Translate results to the rpicam-apps Detection objects std::vector results; - Size output_size = output_stream_->configuration().size; for (auto const &d : detections) { if (d->get_confidence() < threshold_) @@ -273,12 +291,12 @@ std::vector YoloInference::runInference(const uint8_t *frame) // Extract bounding box co-ordinates in the output image co-ordinates. auto const &box = d->get_bbox(); - int x0 = std::round(std::max(box.xmin(), 0.0f) * (output_size.width - 1)); - int x1 = std::round(std::min(box.xmax(), 1.0f) * (output_size.width - 1)); - int y0 = std::round(std::max(box.ymin(), 0.0f) * (output_size.height - 1)); - int y1 = std::round(std::min(box.ymax(), 1.0f) * (output_size.height - 1)); - results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), x0, y0, - x1 - x0, y1 - y0); + const float x0 = std::max(box.xmin(), 0.0f); + const float x1 = std::min(box.xmax(), 1.0f); + const float y0 = std::max(box.ymin(), 0.0f); + const float y1 = std::min(box.ymax(), 1.0f); + libcamera::Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops); + results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), r.x, r.y, r.width, r.height); LOG(2, "Object: " << results.back().toString()); if (--max_detections_ == 0) diff --git a/post_processing_stages/hailo/hailo_yolov8_pose.cpp b/post_processing_stages/hailo/hailo_yolov8_pose.cpp index 16a06897..72a10342 100644 --- a/post_processing_stages/hailo/hailo_yolov8_pose.cpp +++ b/post_processing_stages/hailo/hailo_yolov8_pose.cpp @@ -18,6 +18,7 @@ #include "core/rpicam_app.hpp" #include "hailo_postprocessing_stage.hpp" +using Rectangle = libcamera::Rectangle; using Size = libcamera::Size; using PostProcFuncPtr = std::pair, std::vector>(*)(HailoROIPtr); @@ -38,7 +39,7 @@ class YoloPose : public HailoPostProcessingStage bool Process(CompletedRequestPtr &completed_request) override; private: - void runInference(const uint8_t *input, uint32_t *output); + void runInference(const uint8_t *input, uint32_t *output, const std::vector &scaler_crops); PostProcessingLib postproc_; }; @@ -113,16 +114,32 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request) return false; } + std::vector scaler_crops; + auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop); + auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops); + + if (rpi_scaler_crop) + { + for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++) + scaler_crops.push_back(rpi_scaler_crop->data()[i]); + } + else if (scaler_crop) + { + // Push-back twice, once for main, once for low res. + scaler_crops.push_back(*scaler_crop); + scaler_crops.push_back(*scaler_crop); + } + BufferWriteSync w(app_, completed_request->buffers[output_stream_]); libcamera::Span buffer = w.Get()[0]; uint32_t *output = (uint32_t *)buffer.data(); - runInference(input_ptr, output); + runInference(input_ptr, output, scaler_crops); return false; } -void YoloPose::runInference(const uint8_t *input, uint32_t *output) +void YoloPose::runInference(const uint8_t *input, uint32_t *output, const std::vector &scaler_crops) { hailort::AsyncInferJob job; std::vector output_tensors; @@ -159,30 +176,26 @@ void YoloPose::runInference(const uint8_t *input, uint32_t *output) continue; HailoBBox bbox = detection->get_bbox(); - - cv::rectangle(image, - cv::Point2f(bbox.xmin() * float(output_stream_info_.width), - bbox.ymin() * float(output_stream_info_.height)), - cv::Point2f(bbox.xmax() * float(output_stream_info_.width), - bbox.ymax() * float(output_stream_info_.height)), + const float x0 = std::max(bbox.xmin(), 0.0f); + const float x1 = std::min(bbox.xmax(), 1.0f); + const float y0 = std::max(bbox.ymin(), 0.0f); + const float y1 = std::min(bbox.ymax(), 1.0f); + Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops); + cv::rectangle(image, cv::Point2f(r.x, r.y), cv::Point2f(r.x + r.width, r.y + r.height), cv::Scalar(0, 0, 255), 1); } for (auto &keypoint : keypoints_and_pairs.first) { - cv::circle(image, - cv::Point(keypoint.xs * float(output_stream_info_.width), - keypoint.ys * float(output_stream_info_.height)), - 3, cv::Scalar(255, 0, 255), -1); + Rectangle r = ConvertInferenceCoordinates({ keypoint.xs, keypoint.ys, 1, 1 }, scaler_crops); + cv::circle(image, cv::Point(r.x, r.y), 3, cv::Scalar(255, 0, 255), -1); } - for (PairPairs &p : keypoints_and_pairs.second) + for (const PairPairs &p : keypoints_and_pairs.second) { - auto pt1 = - cv::Point(p.pt1.first * float(output_stream_info_.width), p.pt1.second * float(output_stream_info_.height)); - auto pt2 = - cv::Point(p.pt2.first * float(output_stream_info_.width), p.pt2.second * float(output_stream_info_.height)); - cv::line(image, pt1, pt2, cv::Scalar(255, 0, 255), 3); + Rectangle r1 = ConvertInferenceCoordinates({ p.pt1.first, p.pt1.second, 1, 1 }, scaler_crops); + Rectangle r2 = ConvertInferenceCoordinates({ p.pt2.first, p.pt2.second, 1, 1 }, scaler_crops); + cv::line(image, cv::Point(r1.x, r1.y), cv::Point(r2.x, r2.y), cv::Scalar(255, 0, 255), 3); } } From 92938e90fcaa79ce382012e87409c385486f98ed Mon Sep 17 00:00:00 2001 From: Naushir Patuck Date: Wed, 7 Aug 2024 10:48:02 +0100 Subject: [PATCH 3/3] core: Add aspect ratio control for lores stream Add a new command line argument "--lores-par" that when set, preserves the 1:1 aspect ratio of the low res stream. This is only possible on the PiSP platform. The default behaviour is to have this switch disabled, allowing for identical behaviour betwen VC4 and PiSP platforms. This requires the use of the rpi::ScalerCrops vendor control. This switch can also be triggered via the "rpicam-apps.lores.par" key in the postprocessing JSON file. Signed-off-by: Naushir Patuck --- core/options.cpp | 7 +++++-- core/options.hpp | 1 + core/post_processor.cpp | 2 ++ core/rpicam_app.cpp | 38 ++++++++++++++++++++++++++++---------- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/core/options.cpp b/core/options.cpp index 58805253..fdd1ef20 100644 --- a/core/options.cpp +++ b/core/options.cpp @@ -270,9 +270,11 @@ Options::Options() ("tuning-file", value(&tuning_file)->default_value("-"), "Name of camera tuning file to use, omit this option for libcamera default behaviour") ("lores-width", value(&lores_width)->default_value(0), - "Width of low resolution frames (use 0 to omit low resolution stream") + "Width of low resolution frames (use 0 to omit low resolution stream)") ("lores-height", value(&lores_height)->default_value(0), - "Height of low resolution frames (use 0 to omit low resolution stream") + "Height of low resolution frames (use 0 to omit low resolution stream)") + ("lores-par", value(&lores_par)->default_value(false)->implicit_value(true), + "Preserve the 1:1 pixel aspect ratio of the low res image (where possible) by applying a different crop on the stream.") ("mode", value(&mode_string), "Camera mode as W:H:bit-depth:packing, where packing is P (packed) or U (unpacked)") ("viewfinder-mode", value(&viewfinder_mode_string), @@ -695,6 +697,7 @@ void Options::Print() const std::cerr << " tuning-file: " << (tuning_file == "-" ? "(libcamera)" : tuning_file) << std::endl; std::cerr << " lores-width: " << lores_width << std::endl; std::cerr << " lores-height: " << lores_height << std::endl; + std::cerr << " lores-par: " << lores_par << std::endl; if (afMode_index != -1) std::cerr << " autofocus-mode: " << afMode << std::endl; if (afRange_index != -1) diff --git a/core/options.hpp b/core/options.hpp index d6b5cb16..624c14b4 100644 --- a/core/options.hpp +++ b/core/options.hpp @@ -151,6 +151,7 @@ struct Options bool qt_preview; unsigned int lores_width; unsigned int lores_height; + bool lores_par; unsigned int camera; std::string mode_string; Mode mode; diff --git a/core/post_processor.cpp b/core/post_processor.cpp index 5d796faf..638226c1 100644 --- a/core/post_processor.cpp +++ b/core/post_processor.cpp @@ -120,6 +120,7 @@ void PostProcessor::Read(std::string const &filename) unsigned int lores_width = node.get("lores.width"); unsigned int lores_height = node.get("lores.height"); + bool lores_par = node.get("lores.par", app_->GetOptions()->lores_par); std::string lores_format_str = node.get("lores.format", "yuv420"); libcamera::PixelFormat lores_format = libcamera::formats::YUV420; @@ -132,6 +133,7 @@ void PostProcessor::Read(std::string const &filename) app_->GetOptions()->lores_width = lores_width; app_->GetOptions()->lores_height = lores_height; + app_->GetOptions()->lores_par = lores_par; app_->lores_format_ = lores_format; LOG(1, "Postprocessing requested lores: " << lores_width << "x" << lores_height << " " << lores_format); diff --git a/core/rpicam_app.cpp b/core/rpicam_app.cpp index 16232b9e..7c1174fa 100644 --- a/core/rpicam_app.cpp +++ b/core/rpicam_app.cpp @@ -635,17 +635,35 @@ void RPiCamApp::StartCamera() // Build a list of initial controls that we must set in the camera before starting it. // We don't overwrite anything the application may have set before calling us. - if (!controls_.get(controls::ScalerCrop) && options_->roi_width != 0 && options_->roi_height != 0) + if (!controls_.get(controls::ScalerCrop) && !controls_.get(controls::rpi::ScalerCrops)) { - Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get(); - int x = options_->roi_x * sensor_area.width; - int y = options_->roi_y * sensor_area.height; - int w = options_->roi_width * sensor_area.width; - int h = options_->roi_height * sensor_area.height; - Rectangle crop(x, y, w, h); - crop.translateBy(sensor_area.topLeft()); - LOG(2, "Using crop " << crop.toString()); - controls_.set(controls::ScalerCrop, crop); + const Rectangle sensor_area = camera_->controls().at(&controls::ScalerCrop).max().get(); + const Rectangle default_crop = camera_->controls().at(&controls::ScalerCrop).def().get(); + std::vector crops; + + if (options_->roi_width != 0 && options_->roi_height != 0) + { + int x = options_->roi_x * sensor_area.width; + int y = options_->roi_y * sensor_area.height; + unsigned int w = options_->roi_width * sensor_area.width; + unsigned int h = options_->roi_height * sensor_area.height; + crops.push_back({ x, y, w, h }); + crops.back().translateBy(sensor_area.topLeft()); + } + else + { + crops.push_back(default_crop); + } + + LOG(2, "Using crop (main) " << crops.back().toString()); + + if (options_->lores_width != 0 && options_->lores_height != 0 && !options_->lores_par) + { + crops.push_back(crops.back()); + LOG(2, "Using crop (lores) " << crops.back().toString()); + } + + controls_.set(controls::rpi::ScalerCrops, libcamera::Span(crops.data(), crops.size())); } if (!controls_.get(controls::AfWindows) && !controls_.get(controls::AfMetering) && options_->afWindow_width != 0 &&