Skip to content

Commit

Permalink
postprocess: hailo: Handle multiple scaler crops on the ISP outputs
Browse files Browse the repository at this point in the history
Add a HailoPostProcessingStage::ConvertInferenceCoordinates() helper
that converts from the inference (low res) image co-ordinate space
to the main image space, accounting for different scaler crops.

Fixup the Yolo inference and pose estimation stages to use this new
helper.

Signed-off-by: Naushir Patuck <naush@raspberrypi.com>
  • Loading branch information
naushir committed Aug 29, 2024
1 parent e3357be commit f58bfe1
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 29 deletions.
31 changes: 31 additions & 0 deletions post_processing_stages/hailo/hailo_postprocessing_stage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

using namespace hailort;

using Rectangle = libcamera::Rectangle;
using Size = libcamera::Size;

Allocator::Allocator()
{
}
Expand Down Expand Up @@ -255,3 +258,31 @@ HailoROIPtr HailoPostProcessingStage::MakeROI(const std::vector<OutTensor> &outp

return roi;
}

Rectangle HailoPostProcessingStage::ConvertInferenceCoordinates(const std::vector<float> &coords,
const std::vector<Rectangle> &scaler_crops) const
{
if (coords.size() != 4 || scaler_crops.size() != 2)
return {};

// Convert the inference image co-ordinates into the final ISP output co-ordinates.
const Size &isp_output_size = output_stream_->configuration().size;

// Object scaled to the full sensor resolution
Rectangle obj;
obj.x = std::round(coords[0] * (scaler_crops[1].width - 1));
obj.y = std::round(coords[1] * (scaler_crops[1].height - 1));
obj.width = std::round(coords[2] * (scaler_crops[1].width - 1));
obj.height = std::round(coords[3] * (scaler_crops[1].height - 1));

// Object on the low res scaler crop -> translated by the start of the low res crop offset
const Rectangle obj_translated_l = obj.translatedBy(scaler_crops[1].topLeft());
// -> bounded to the high res output crop
const Rectangle obj_bounded = obj_translated_l.boundedTo(scaler_crops[0]);
// -> translated to the start of the high res crop offset
const Rectangle obj_translated_h = obj_bounded.translatedBy(-scaler_crops[0].topLeft());
// -> and scaled to the ISP output.
const Rectangle obj_scaled = obj_translated_h.scaledBy(isp_output_size, scaler_crops[0].size());

return obj_scaled;
}
5 changes: 5 additions & 0 deletions post_processing_stages/hailo/hailo_postprocessing_stage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <string>
#include <vector>

#include <libcamera/geometry.h>

#include <hailo/hailort.hpp>
#include "hailo_objects.hpp"

Expand Down Expand Up @@ -110,6 +112,9 @@ class HailoPostProcessingStage : public PostProcessingStage
hailo_status DispatchJob(const uint8_t *input, hailort::AsyncInferJob &job, std::vector<OutTensor> &output_tensors);
HailoROIPtr MakeROI(const std::vector<OutTensor> &output_tensors) const;

libcamera::Rectangle ConvertInferenceCoordinates(const std::vector<float> &coords,
const std::vector<libcamera::Rectangle> &scaler_crops) const;

libcamera::Stream *low_res_stream_;
libcamera::Stream *output_stream_;
libcamera::Stream *raw_stream_;
Expand Down
38 changes: 28 additions & 10 deletions post_processing_stages/hailo/hailo_yolo_inference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <string>
#include <vector>

#include <libcamera/controls.h>
#include <libcamera/geometry.h>

#include "core/rpicam_app.hpp"
Expand All @@ -28,6 +29,8 @@ using PostProcFuncPtrNms = void (*)(HailoROIPtr);
using InitFuncPtr = YoloParams *(*)(std::string, std::string);
using FreeFuncPtr = void (*)(void *);

using Rectangle = libcamera::Rectangle;

namespace fs = std::filesystem;

#define NAME "hailo_yolo_inference"
Expand All @@ -49,7 +52,7 @@ class YoloInference : public HailoPostProcessingStage
bool Process(CompletedRequestPtr &completed_request) override;

private:
std::vector<Detection> runInference(const uint8_t *frame);
std::vector<Detection> runInference(const uint8_t *frame, const std::vector<libcamera::Rectangle> &scaler_crops);
void filterOutputObjects(std::vector<Detection> &objects);

struct LtObject
Expand Down Expand Up @@ -187,7 +190,23 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
return false;
}

std::vector<Detection> objects = runInference(input_ptr);
std::vector<Rectangle> scaler_crops;
auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);

if (rpi_scaler_crop)
{
for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
scaler_crops.push_back(rpi_scaler_crop->data()[i]);
}
else if (scaler_crop)
{
// Push-back twice, once for main, once for low res.
scaler_crops.push_back(*scaler_crop);
scaler_crops.push_back(*scaler_crop);
}

std::vector<Detection> objects = runInference(input_ptr, scaler_crops);
if (objects.size())
{
if (temporal_filtering_)
Expand Down Expand Up @@ -215,7 +234,7 @@ bool YoloInference::Process(CompletedRequestPtr &completed_request)
return false;
}

std::vector<Detection> YoloInference::runInference(const uint8_t *frame)
std::vector<Detection> YoloInference::runInference(const uint8_t *frame, const std::vector<Rectangle> &scaler_crops)
{
hailort::AsyncInferJob job;
std::vector<OutTensor> output_tensors;
Expand Down Expand Up @@ -265,20 +284,19 @@ std::vector<Detection> YoloInference::runInference(const uint8_t *frame)

// Translate results to the rpicam-apps Detection objects
std::vector<Detection> results;
Size output_size = output_stream_->configuration().size;
for (auto const &d : detections)
{
if (d->get_confidence() < threshold_)
continue;

// Extract bounding box co-ordinates in the output image co-ordinates.
auto const &box = d->get_bbox();
int x0 = std::round(std::max(box.xmin(), 0.0f) * (output_size.width - 1));
int x1 = std::round(std::min(box.xmax(), 1.0f) * (output_size.width - 1));
int y0 = std::round(std::max(box.ymin(), 0.0f) * (output_size.height - 1));
int y1 = std::round(std::min(box.ymax(), 1.0f) * (output_size.height - 1));
results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), x0, y0,
x1 - x0, y1 - y0);
const float x0 = std::max(box.xmin(), 0.0f);
const float x1 = std::min(box.xmax(), 1.0f);
const float y0 = std::max(box.ymin(), 0.0f);
const float y1 = std::min(box.ymax(), 1.0f);
libcamera::Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
results.emplace_back(d->get_class_id(), d->get_label(), d->get_confidence(), r.x, r.y, r.width, r.height);
LOG(2, "Object: " << results.back().toString());

if (--max_detections_ == 0)
Expand Down
51 changes: 32 additions & 19 deletions post_processing_stages/hailo/hailo_yolov8_pose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "core/rpicam_app.hpp"
#include "hailo_postprocessing_stage.hpp"

using Rectangle = libcamera::Rectangle;
using Size = libcamera::Size;
using PostProcFuncPtr = std::pair<std::vector<KeyPt>, std::vector<PairPairs>>(*)(HailoROIPtr);

Expand All @@ -38,7 +39,7 @@ class YoloPose : public HailoPostProcessingStage
bool Process(CompletedRequestPtr &completed_request) override;

private:
void runInference(const uint8_t *input, uint32_t *output);
void runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops);

PostProcessingLib postproc_;
};
Expand Down Expand Up @@ -113,16 +114,32 @@ bool YoloPose::Process(CompletedRequestPtr &completed_request)
return false;
}

std::vector<Rectangle> scaler_crops;
auto scaler_crop = completed_request->metadata.get(controls::ScalerCrop);
auto rpi_scaler_crop = completed_request->metadata.get(controls::rpi::ScalerCrops);

if (rpi_scaler_crop)
{
for (unsigned int i = 0; i < rpi_scaler_crop->size(); i++)
scaler_crops.push_back(rpi_scaler_crop->data()[i]);
}
else if (scaler_crop)
{
// Push-back twice, once for main, once for low res.
scaler_crops.push_back(*scaler_crop);
scaler_crops.push_back(*scaler_crop);
}

BufferWriteSync w(app_, completed_request->buffers[output_stream_]);
libcamera::Span<uint8_t> buffer = w.Get()[0];
uint32_t *output = (uint32_t *)buffer.data();

runInference(input_ptr, output);
runInference(input_ptr, output, scaler_crops);

return false;
}

void YoloPose::runInference(const uint8_t *input, uint32_t *output)
void YoloPose::runInference(const uint8_t *input, uint32_t *output, const std::vector<Rectangle> &scaler_crops)
{
hailort::AsyncInferJob job;
std::vector<OutTensor> output_tensors;
Expand Down Expand Up @@ -159,30 +176,26 @@ void YoloPose::runInference(const uint8_t *input, uint32_t *output)
continue;

HailoBBox bbox = detection->get_bbox();

cv::rectangle(image,
cv::Point2f(bbox.xmin() * float(output_stream_info_.width),
bbox.ymin() * float(output_stream_info_.height)),
cv::Point2f(bbox.xmax() * float(output_stream_info_.width),
bbox.ymax() * float(output_stream_info_.height)),
const float x0 = std::max(bbox.xmin(), 0.0f);
const float x1 = std::min(bbox.xmax(), 1.0f);
const float y0 = std::max(bbox.ymin(), 0.0f);
const float y1 = std::min(bbox.ymax(), 1.0f);
Rectangle r = ConvertInferenceCoordinates({ x0, y0, x1 - x0, y1 - y0 }, scaler_crops);
cv::rectangle(image, cv::Point2f(r.x, r.y), cv::Point2f(r.x + r.width, r.y + r.height),
cv::Scalar(0, 0, 255), 1);
}

for (auto &keypoint : keypoints_and_pairs.first)
{
cv::circle(image,
cv::Point(keypoint.xs * float(output_stream_info_.width),
keypoint.ys * float(output_stream_info_.height)),
3, cv::Scalar(255, 0, 255), -1);
Rectangle r = ConvertInferenceCoordinates({ keypoint.xs, keypoint.ys, 1, 1 }, scaler_crops);
cv::circle(image, cv::Point(r.x, r.y), 3, cv::Scalar(255, 0, 255), -1);
}

for (PairPairs &p : keypoints_and_pairs.second)
for (const PairPairs &p : keypoints_and_pairs.second)
{
auto pt1 =
cv::Point(p.pt1.first * float(output_stream_info_.width), p.pt1.second * float(output_stream_info_.height));
auto pt2 =
cv::Point(p.pt2.first * float(output_stream_info_.width), p.pt2.second * float(output_stream_info_.height));
cv::line(image, pt1, pt2, cv::Scalar(255, 0, 255), 3);
Rectangle r1 = ConvertInferenceCoordinates({ p.pt1.first, p.pt1.second, 1, 1 }, scaler_crops);
Rectangle r2 = ConvertInferenceCoordinates({ p.pt2.first, p.pt2.second, 1, 1 }, scaler_crops);
cv::line(image, cv::Point(r1.x, r1.y), cv::Point(r2.x, r2.y), cv::Scalar(255, 0, 255), 3);
}
}

Expand Down

0 comments on commit f58bfe1

Please sign in to comment.