diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml index 8be5cd9023..7d7abc3bb0 100644 --- a/.github/workflows/config.yml +++ b/.github/workflows/config.yml @@ -66,16 +66,29 @@ jobs: NOT_TEST_INSTALL : true USE_JENKINS: true DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl - TEST_PKGS : 'jsk_recognition_msgs' # to skip test - BEFORE_SCRIPT : "sudo pip install numpy==1.16.6; sudo pip install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==6.7.0 protobuf==3.18.0 cupy-cuda91 pytesseract==0.3.6 torch==1.4.0;" + BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;" + BUILD_PKGS: 'checkerboard_detector imagesift jsk_perception jsk_recognition_utils resized_image_transport' + EXTRA_DEB: ros-noetic-pr2-description experimental : false - ROS_DISTRO: noetic USE_DEB: false NOT_TEST_INSTALL : true USE_JENKINS: true DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl - BEFORE_SCRIPT : "sudo pip install numpy==1.16.6; sudo pip install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==6.7.0 protobuf==3.18.0 cupy-cuda91 pytesseract==0.3.6 torch==1.4.0;" - experimental : true + TEST_PKGS : 'jsk_pcl_ros_utils jsk_pcl_ros' + BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;" + BUILD_PKGS: 'jsk_pcl_ros_utils jsk_pcl_ros' + EXTRA_DEB: ros-noetic-pr2-description + experimental : false + - ROS_DISTRO: noetic + USE_DEB: false + NOT_TEST_INSTALL : true + USE_JENKINS: true + DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl + TEST_PKGS : 'jsk_recognition_msgs' # to skip test + BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;" + EXTRA_DEB: ros-noetic-pr2-description + experimental : false steps: diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml index 72f28a3229..3940014f6a 100644 --- a/.github/workflows/python2.yml +++ b/.github/workflows/python2.yml @@ -15,4 +15,4 @@ jobs: - name: Check Python2 run: | apt update -q && apt install -y -q python2 - python2 -m compileall . + python2 -m compileall -x `find docker -name "*.py"` . diff --git a/.travis.rosinstall.noetic b/.travis.rosinstall.noetic new file mode 100644 index 0000000000..62d2cb6127 --- /dev/null +++ b/.travis.rosinstall.noetic @@ -0,0 +1,4 @@ +- git: # https://github.com/ros-perception/image_transport_plugins/pull/64 + uri: https://github.com/ros-perception/image_transport_plugins.git + local-name: image_transport_plugins + version: noetic-devel diff --git a/audio_to_spectrogram/CMakeLists.txt b/audio_to_spectrogram/CMakeLists.txt index e2909950b0..f3828e9d70 100644 --- a/audio_to_spectrogram/CMakeLists.txt +++ b/audio_to_spectrogram/CMakeLists.txt @@ -8,7 +8,7 @@ find_package(catkin REQUIRED COMPONENTS catkin_python_setup() generate_dynamic_reconfigure_options( - cfg/AudioAmplitudePlot.cfg + cfg/DataAmplitudePlot.cfg ) catkin_package() @@ -41,4 +41,10 @@ if(CATKIN_ENABLE_TESTING) find_package(catkin REQUIRED COMPONENTS rostest roslaunch) add_rostest(test/audio_to_spectrogram.test) roslaunch_add_file_check(launch/audio_to_spectrogram.launch) + if(NOT $ENV{ROS_DISTRO} STRLESS "kinetic") + # Under kinetic, eval cannot be used in launch files + # http://wiki.ros.org/roslaunch/XML#substitution_args + add_rostest(test/wrench_to_spectrogram.test) + roslaunch_add_file_check(launch/wrench_to_spectrogram.launch) + endif() endif() diff --git a/audio_to_spectrogram/README.md b/audio_to_spectrogram/README.md index 46070a99f8..321aac4cd7 100644 --- a/audio_to_spectrogram/README.md +++ b/audio_to_spectrogram/README.md @@ -1,6 +1,6 @@ # audio_to_spectrogram -This package converts audio data to spectrum and spectrogram data. +This package converts audio data (or other time-series data) to spectrum and spectrogram data. # Usage By following command, you can publish audio, spectrum and spectrogram topics. Please set correct args for your microphone configuration, such as mic\_sampling\_rate or bitdepth. @@ -9,6 +9,13 @@ By following command, you can publish audio, spectrum and spectrogram topics. Pl roslaunch audio_to_spectrogram audio_to_spectrogram.launch ``` +Its data conversion pipeline is as follows: +``` +audio_to_spectrum.py -> spectrum + -> normalized_half_spectrum + -> log_spectrum -> preprocess node(s) -> preprocessed spectrum -> spectrum_to_spectrogram.py -> spectrogram +``` + Here is an example using rosbag with 300Hz audio. ```bash roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch @@ -18,19 +25,48 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch |---|---|---| ||![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)| +You can also convert data other than audio to spectrum and spectrogram data using this package. +Here is an example using rosbag of a force torque sensor sensing drill vibration. +```bash +roslaunch audio_to_spectrogram sample_wrench_to_spectrogram.launch +``` + +|Z-axis Force Amplitude|Normalized Half Spectrum|Spectrogram Source Spectrum|Spectrogram| +|---|---|---|---| +||||| + # Scripts ## audio_to_spectrum.py + A script to convert audio to spectrum. - ### Publishing topics - - `~spectrum` (`jsk_recognition_msgs/Spectrum`) - Spectrum data calculated from audio by FFT. + Spectrum data calculated from audio by FFT. + It is usual "amplitude spectrum". + See https://ryo-iijima.com/fftresult/ for details. + + - `~normalized_half_spectrum` (`jsk_recognition_msgs/Spectrum`) + + Spectrum data which is "half" (having non-negative frequencies (0Hz-Nyquist frequency)) and is "normalized" (consistent with the amplitude of the original signal). + See the following for details. + - https://ryo-iijima.com/fftresult/ + - https://stackoverflow.com/questions/63211851/why-divide-the-output-of-numpy-fft-by-n + - https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issue-1550715400 + + - `~log_spectrum` (`jsk_recognition_msgs/Spectrum`) + + Log-scaled spectrum data. + It is calculated by applying log to the absolute value of the FFT result. + Usually, log is applied to "power spectrum", but we don't use it for simplicity. + See the following for details. + - https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issuecomment-1445810380 + - http://makotomurakami.com/blog/2020/05/23/5266/ - ### Subscribing topics - - `audio` (`audio_common_msgs/AudioData`) + - `~audio` (`audio_common_msgs/AudioData`) Audio stream data from microphone. The audio format must be `wave`. @@ -55,15 +91,94 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch Number of bits per audio data. - - `~high_cut_freq` (`Int`, default: `800`) + - `~fft_exec_rate` (`Double`, default: `50`) + + Rate [Hz] to execute FFT and publish its results. + +## data_to_spectrum.py + + Generalized version of `audio_to_spectrum.py`. + This script can convert multiple message types to spectrum. + + - ### Publishing topics + + Same as `audio_to_spectrum.py`. + + - ### Subscribing topics + - `~input` (`AnyMsg`) + + Topic to which message including data you want to convert to spectrum is published. + + - ### Parameters + - `~expression_to_get_data` (`String`, default: `m.data`) + + Python expression to get data from the input message `m`. For example, if your input is `std_msgs/Float64`, it is `m.data`. + Just accessing a field of `m` is recommended. + If you want to do a complex calculation (e.g., using `numpy`), use `transform` of `topic_tools` before this node. + + - `~data_sampling_rate` (`Int`, default: `500`) + + Sampling rate [Hz] of input data. + + - `~fft_sampling_period` (`Double`, default: `0.3`) + + Period [s] to sample input data for one FFT. + + - `~fft_exec_rate` (`Double`, default: `50`) + + Rate [Hz] to execute FFT and publish its results. + + - `~is_integer` (`Bool`, default: `false`) + + Whether input data is integer or not. For example, if your input is `std_msgs/Float64`, it is `false`. + + - `~is_signed` (`Bool`, default: `true`) + + Whether input data is signed or not. For example, if your input is `std_msgs/Float64`, it is `true`. + + - `~bitdepth` (`Int`, default: `64`) + + Number of bits per input data. For example, if your input is `std_msgs/Float64`, it is `64`. + + - `~n_channel` (`Int`, default: `1`) + + If your input is scalar, it is `1`. + If your input is flattened 2D matrix, it is number of channel of original matrix. + + - `~target_channel` (`Int`, default: `0`) + + If your input is scalar, it is `0`. + If your input is flattened 2D matrix, it is target channel. + +## spectrum_filter.py + + A script to filter spectrum. + + - ### Publishing topics + - `~output` (`jsk_recognition_msgs/Spectrum`) + + Filtered spectrum data (`low_cut_freq`-`high_cut_freq`). + + - ### Subscribing topics + - `~input` (`jsk_recognition_msgs/Spectrum`) + + Original spectrum data. + + - ### Parameters + - `~data_sampling_rate` (`Int`, default: `500`) + + Sampling rate [Hz] of data used in generation of original spectrum data. + + - `~high_cut_freq` (`Int`, default: `250`) Threshold to limit the maximum frequency of the output spectrum. - - `~low_cut_freq` (`Int`, default: `1`) + - `~low_cut_freq` (`Int`, default: `0`) Threshold to limit the minimum frequency of the output spectrum. ## spectrum_to_spectrogram.py + A script to convert spectrum to spectrogram. - ### Publishing topics @@ -128,7 +243,7 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch Number of bits per audio data. - - `~maximum_amplitude` (`Int`, default: `10000`) + - `~maximum_amplitude` (`Double`, default: `10000.0`) Maximum range of amplitude to plot. @@ -140,6 +255,66 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch Publish rate [Hz] of audio amplitude image topic. +## data_amplitude_plot.py + + Generalized version of `audio_amplitude_plot.py`. + + - ### Publishing topics + + - `~output/viz` (`sensor_msgs/Image`) + + Data amplitude plot image. + + - ### Subscribing topics + - `~input` (`AnyMsg`) + + Topic to which message including data whose amplitude you want to plot is published. + + - ### Parameters + - `~expression_to_get_data` (`String`, default: `m.data`) + + Python expression to get data from the input message `m`. For example, if your input is `std_msgs/Float64`, it is `m.data`. + Just accessing a field of `m` is recommended. + If you want to do a complex calculation (e.g., using `numpy`), use `transform` of `topic_tools` before this node. + + - `~data_sampling_rate` (`Int`, default: `500`) + + Sampling rate [Hz] of input data. + + - `~is_integer` (`Bool`, default: `false`) + + Whether input data is integer or not. For example, if your input is `std_msgs/Float64`, it is `false`. + + - `~is_signed` (`Bool`, default: `true`) + + Whether input data is signed or not. For example, if your input is `std_msgs/Float64`, it is `true`. + + - `~bitdepth` (`Int`, default: `64`) + + Number of bits per input data. For example, if your input is `std_msgs/Float64`, it is `64`. + + - `~n_channel` (`Int`, default: `1`) + + If your input is scalar, it is `1`. + If your input is flattened 2D matrix, it is number of channel of original matrix. + + - `~target_channel` (`Int`, default: `0`) + + If your input is scalar, it is `0`. + If your input is flattened 2D matrix, it is target channel. + + - `~maximum_amplitude` (`Double`, default: `10.0`) + + Maximum range of amplitude to plot. + + - `~window_size` (`Double`, default: `10.0`) + + Window size of input data to plot. + + - `~rate` (`Double`, default: `10.0`) + + Publish rate [Hz] of data amplitude image topic. + ## spectrum_plot.py A script to publish frequency vs amplitude plot image. @@ -157,3 +332,20 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch - `~spectrum` (`jsk_recognition_msgs/Spectrum`) Spectrum data calculated from audio by FFT. + + - ### Parameters + - `~min_amp` (`Double`, default: `0.0`) + + Minimum value of amplitude in plot. + + - `~max_amp` (`Double`, default: `20.0`) + + Maximum value of amplitude in plot. + + - `~queue_size` (`Int`, default: `1`) + + Queue size of spectrum subscriber. + + - `~max_rate` (`Double`, default: `-1`) + + Maximum publish rate [Hz] of frequency vs amplitude plot image. Setting this value low reduces CPU load. `-1` means no maximum limit. diff --git a/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg b/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg deleted file mode 100755 index a626a14603..0000000000 --- a/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg +++ /dev/null @@ -1,13 +0,0 @@ -#! /usr/bin/env python - -PACKAGE = 'audio_to_spectrogram' - -from dynamic_reconfigure.parameter_generator_catkin import * - - -gen = ParameterGenerator() - -gen.add("maximum_amplitude", int_t, 0, "Maximum range of amplitude to plot", 10000, 1, 100000) -gen.add("window_size", double_t, 0, "Window size of sound input to plot", 10.0, 0.1, 10000) - -exit(gen.generate(PACKAGE, PACKAGE, "AudioAmplitudePlot")) diff --git a/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg b/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg new file mode 100755 index 0000000000..9c2c962160 --- /dev/null +++ b/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg @@ -0,0 +1,13 @@ +#! /usr/bin/env python + +PACKAGE = 'audio_to_spectrogram' + +from dynamic_reconfigure.parameter_generator_catkin import * + + +gen = ParameterGenerator() + +gen.add("maximum_amplitude", double_t, 0, "Maximum range of amplitude to plot", 10.0) +gen.add("window_size", double_t, 0, "Window size of data input to plot", 10.0) + +exit(gen.generate(PACKAGE, PACKAGE, "DataAmplitudePlot")) diff --git a/audio_to_spectrogram/docs/images/wrench_amplitude.jpg b/audio_to_spectrogram/docs/images/wrench_amplitude.jpg new file mode 100644 index 0000000000..fd1ae4dc0a Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_amplitude.jpg differ diff --git a/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg b/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg new file mode 100644 index 0000000000..11065960ee Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg differ diff --git a/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg b/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg new file mode 100644 index 0000000000..0e7b7679eb Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg differ diff --git a/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg b/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg new file mode 100644 index 0000000000..27e427484e Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg differ diff --git a/audio_to_spectrogram/launch/audio_to_spectrogram.launch b/audio_to_spectrogram/launch/audio_to_spectrogram.launch index 56f1c139aa..ae0eaf8551 100644 --- a/audio_to_spectrogram/launch/audio_to_spectrogram.launch +++ b/audio_to_spectrogram/launch/audio_to_spectrogram.launch @@ -2,20 +2,34 @@ + + + + + + + + + - + + + + + - @@ -28,66 +42,32 @@ - - - - n_channel: $(arg n_channel) - mic_sampling_rate: $(arg mic_sampling_rate) - fft_sampling_period: 0.3 - bitdepth: $(arg bitdepth) - high_cut_freq: $(arg high_cut_freq) - low_cut_freq: $(arg low_cut_freq) - fft_exec_rate: 50 - - + + + + + + + + + + + + + - - - - image_height: 300 - image_width: 300 - spectrogram_period: $(arg spectrogram_period) - - - - - - - - n_channel: $(arg n_channel) - mic_sampling_rate: $(arg mic_sampling_rate) - bitdepth: $(arg bitdepth) - - - - - - - - - - - - - - - - - - - - - - - - do_dynamic_scaling: true - - colormap: 2 - - - + + + + + + + + + + + + diff --git a/audio_to_spectrogram/launch/audio_to_spectrum.launch b/audio_to_spectrogram/launch/audio_to_spectrum.launch new file mode 100644 index 0000000000..fd2650fdee --- /dev/null +++ b/audio_to_spectrogram/launch/audio_to_spectrum.launch @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + n_channel: $(arg n_channel) + target_channel: $(arg target_channel) + mic_sampling_rate: $(arg mic_sampling_rate) + fft_sampling_period: $(arg fft_sampling_period) + bitdepth: $(arg bitdepth) + fft_exec_rate: $(arg fft_exec_rate) + + + + + + + + n_channel: $(arg n_channel) + target_channel: $(arg target_channel) + mic_sampling_rate: $(arg mic_sampling_rate) + bitdepth: $(arg bitdepth) + maximum_amplitude: $(arg audio_amp_plot_max_amp) + window_size: $(arg audio_amp_plot_window_size) + rate: $(arg audio_amp_plot_rate) + + + + + + + + + + diff --git a/audio_to_spectrogram/launch/data_to_spectrum.launch b/audio_to_spectrogram/launch/data_to_spectrum.launch new file mode 100644 index 0000000000..3acf38845f --- /dev/null +++ b/audio_to_spectrogram/launch/data_to_spectrum.launch @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + expression_to_get_data: $(arg expression_to_get_data) + data_sampling_rate: $(arg data_sampling_rate) + fft_sampling_period: $(arg fft_sampling_period) + is_integer: $(arg is_integer) + is_signed: $(arg is_signed) + bitdepth: $(arg bitdepth) + n_channel: $(arg n_channel) + target_channel: $(arg target_channel) + fft_exec_rate: $(arg fft_exec_rate) + + + + + + + + expression_to_get_data: $(arg expression_to_get_data) + data_sampling_rate: $(arg data_sampling_rate) + is_integer: $(arg is_integer) + is_signed: $(arg is_signed) + bitdepth: $(arg bitdepth) + n_channel: $(arg n_channel) + target_channel: $(arg target_channel) + maximum_amplitude: $(arg data_amp_plot_max_amp) + window_size: $(arg data_amp_plot_window_size) + rate: $(arg data_amp_plot_rate) + + + + + + + + + + diff --git a/audio_to_spectrogram/launch/spectrum_plot.launch b/audio_to_spectrogram/launch/spectrum_plot.launch new file mode 100644 index 0000000000..1ec8c2cf14 --- /dev/null +++ b/audio_to_spectrogram/launch/spectrum_plot.launch @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + min_amp: $(arg min_amp) + max_amp: $(arg max_amp) + queue_size: $(arg queue_size) + max_rate: $(arg max_rate) + + + + + + + + + + diff --git a/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch b/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch new file mode 100644 index 0000000000..11cae6f7c2 --- /dev/null +++ b/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + + + data_sampling_rate: $(arg data_sampling_rate) + high_cut_freq: $(arg high_cut_freq) + low_cut_freq: $(arg low_cut_freq) + + + + + + + + image_height: $(arg spectrogram_height) + image_width: $(arg spectrogram_width) + spectrogram_period: $(arg spectrogram_period) + + + + + + + + + + + + + + + + + + + + + do_dynamic_scaling: $(arg do_dynamic_scaling) + colormap: $(arg colormap) + + + + + diff --git a/audio_to_spectrogram/launch/wrench_to_spectrogram.launch b/audio_to_spectrogram/launch/wrench_to_spectrogram.launch new file mode 100644 index 0000000000..9c1ca7e144 --- /dev/null +++ b/audio_to_spectrogram/launch/wrench_to_spectrogram.launch @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/audio_to_spectrogram/package.xml b/audio_to_spectrogram/package.xml index 3d72033ad7..a1f5101094 100644 --- a/audio_to_spectrogram/package.xml +++ b/audio_to_spectrogram/package.xml @@ -15,13 +15,18 @@ audio_common_msgs cv_bridge dynamic_reconfigure + geometry_msgs image_view jsk_recognition_msgs jsk_topic_tools python-matplotlib python3-matplotlib + rostopic sensor_msgs + std_msgs + topic_tools + jsk_tools roslaunch rostest diff --git a/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag b/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag new file mode 100644 index 0000000000..cdf0cb60e8 Binary files /dev/null and b/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag differ diff --git a/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch b/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch new file mode 100644 index 0000000000..702025ceee --- /dev/null +++ b/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + diff --git a/audio_to_spectrogram/scripts/audio_amplitude_plot.py b/audio_to_spectrogram/scripts/audio_amplitude_plot.py index 856b149fff..54dc338d3b 100755 --- a/audio_to_spectrogram/scripts/audio_amplitude_plot.py +++ b/audio_to_spectrogram/scripts/audio_amplitude_plot.py @@ -1,85 +1,22 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import division - -import cv_bridge -from dynamic_reconfigure.server import Server -from jsk_topic_tools import ConnectionBasedTransport -import matplotlib.pyplot as plt -import numpy as np import rospy -import sensor_msgs.msg from audio_to_spectrogram import AudioBuffer -from audio_to_spectrogram.cfg import AudioAmplitudePlotConfig as Config -from audio_to_spectrogram import convert_matplotlib_to_img +from audio_to_spectrogram import DataAmplitudePlot -class AudioAmplitudePlot(ConnectionBasedTransport): +class AudioAmplitudePlot(DataAmplitudePlot): def __init__(self): - super(AudioAmplitudePlot, self).__init__() - - self.audio_buffer = AudioBuffer.from_rosparam() - - # Set matplotlib config - self.fig = plt.figure(figsize=(8, 5)) - self.ax = self.fig.add_subplot(1, 1, 1) - self.ax.grid(True) - self.ax.set_xlabel('Time [s]', fontsize=12) - self.ax.set_ylabel('Amplitude', fontsize=12) - self.line, = self.ax.plot([0, 0], label='Amplitude of Audio') - - # ROS dynamic reconfigure - self.srv = Server(Config, self.config_callback) - - self.pub_img = self.advertise( - '~output/viz', sensor_msgs.msg.Image, queue_size=1) - - def start_timer(self): - rate = rospy.get_param('~rate', 10) - if rate == 0: - rospy.logwarn('You cannot set 0 as the rate; change it to 10.') - rate = 10 - self.timer = rospy.Timer(rospy.Duration(1.0 / rate), self.timer_cb) - - def stop_timer(self): - self.timer.shutdown() - - def subscribe(self): - self.audio_buffer.subscribe() - self.start_timer() - - def unsubscribe(self): - self.audio_buffer.unsubscribe() - self.stop_timer() - - def config_callback(self, config, level): - self.maximum_amplitude = config.maximum_amplitude - self.window_size = config.window_size - self.audio_buffer.window_size = self.window_size - return config - - def timer_cb(self, timer): - window_size = self.window_size - amp = self.audio_buffer.read(window_size) - if len(amp) == 0: - return - times = np.linspace(-window_size, 0.0, len(amp)) - - # Plot audio amplitude. - self.line.set_data(times, amp) - self.ax.set_xlim((times.min(), times.max())) - self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude)) - - self.ax.legend(loc='upper right') - if self.pub_img.get_num_connections() > 0: - bridge = cv_bridge.CvBridge() - img = convert_matplotlib_to_img(self.fig) - img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8') - img_msg.header.stamp = rospy.Time.now() - self.pub_img.publish(img_msg) + # Overwrite dynamic_reconfigure's default values + param_name = '~maximum_amplitude' + if not rospy.has_param(param_name): + rospy.set_param(param_name, 10000) + + super(AudioAmplitudePlot, self).__init__( + data_buffer=AudioBuffer.from_rosparam(), + ) if __name__ == '__main__': diff --git a/audio_to_spectrogram/scripts/audio_to_spectrum.py b/audio_to_spectrogram/scripts/audio_to_spectrum.py index 920bc67c58..0fd85254cc 100755 --- a/audio_to_spectrogram/scripts/audio_to_spectrum.py +++ b/audio_to_spectrogram/scripts/audio_to_spectrum.py @@ -1,67 +1,19 @@ #!/usr/bin/env python -import numpy as np import rospy -from audio_common_msgs.msg import AudioData -from jsk_recognition_msgs.msg import Spectrum - from audio_to_spectrogram import AudioBuffer +from audio_to_spectrogram import DataToSpectrum # This node execute FFT to audio (audio_common_msgs/AudioData) # The format of audio topic is assumed to be wave. -class AudioToSpectrum(object): +class AudioToSpectrum(DataToSpectrum): def __init__(self): - super(AudioToSpectrum, self).__init__() - - self.audio_buffer = AudioBuffer.from_rosparam(auto_start=True) - fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3) - self.audio_buffer.window_size = fft_sampling_period - mic_sampling_rate = self.audio_buffer.input_sample_rate - - # fft config - window_function = np.arange( - 0.0, 1.0, 1.0 / self.audio_buffer.audio_buffer_len) - self.window_function = 0.54 - 0.46 * np.cos( - 2 * np.pi * window_function) - high_cut_freq = rospy.get_param('~high_cut_freq', 800) - if high_cut_freq > mic_sampling_rate / 2: - rospy.logerr('Set high_cut_freq lower than {} Hz'.format( - mic_sampling_rate / 2)) - low_cut_freq = rospy.get_param('~low_cut_freq', 1) # remove 0 Hz - self.freq = np.fft.fftfreq( - self.audio_buffer.audio_buffer_len, d=1./mic_sampling_rate) - self.cutoff_mask = np.where( - (low_cut_freq <= self.freq) & (self.freq <= high_cut_freq), - True, False) - # How many times fft is executed in one second - # fft_exec_rate equals to output spectrogram hz - self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50) - - # Publisher and Subscriber - self.pub_spectrum = rospy.Publisher( - '~spectrum', Spectrum, queue_size=1) - self.pub_spectrum_filtered = rospy.Publisher( - '~spectrum_filtered', Spectrum, queue_size=1) - rospy.Timer(rospy.Duration(1. / self.fft_exec_rate), self.timer_cb) - - def timer_cb(self, timer): - if len(self.audio_buffer) != self.audio_buffer.audio_buffer_len: - return - audio_data = self.audio_buffer.read() - # Calc spectrum by fft - amplitude = np.fft.fft(audio_data * self.window_function) - amplitude = np.log(np.abs(amplitude)) - spectrum_msg = Spectrum() - spectrum_msg.header.stamp = rospy.Time.now() - spectrum_msg.amplitude = amplitude - spectrum_msg.frequency = self.freq - self.pub_spectrum.publish(spectrum_msg) - spectrum_msg.amplitude = amplitude[self.cutoff_mask] - spectrum_msg.frequency = self.freq[self.cutoff_mask] - self.pub_spectrum_filtered.publish(spectrum_msg) + super(AudioToSpectrum, self).__init__( + data_buffer=AudioBuffer.from_rosparam(auto_start=True), + ) if __name__ == '__main__': diff --git a/audio_to_spectrogram/scripts/data_amplitude_plot.py b/audio_to_spectrogram/scripts/data_amplitude_plot.py new file mode 100755 index 0000000000..e34ad7b97c --- /dev/null +++ b/audio_to_spectrogram/scripts/data_amplitude_plot.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import rospy + +from audio_to_spectrogram import DataAmplitudePlot + + +if __name__ == '__main__': + rospy.init_node('data_amplitude_plot') + DataAmplitudePlot() + rospy.spin() diff --git a/audio_to_spectrogram/scripts/data_to_spectrum.py b/audio_to_spectrogram/scripts/data_to_spectrum.py new file mode 100755 index 0000000000..62f6e1b798 --- /dev/null +++ b/audio_to_spectrogram/scripts/data_to_spectrum.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python + +import rospy + +from audio_to_spectrogram import DataToSpectrum + +# This node execute FFT to input data + +if __name__ == '__main__': + rospy.init_node('data_to_spectrum') + DataToSpectrum() + rospy.spin() diff --git a/audio_to_spectrogram/scripts/spectrum_filter.py b/audio_to_spectrogram/scripts/spectrum_filter.py new file mode 100755 index 0000000000..bfa26d7808 --- /dev/null +++ b/audio_to_spectrogram/scripts/spectrum_filter.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +from __future__ import division + +import numpy as np +import rospy + +from jsk_recognition_msgs.msg import Spectrum +from jsk_topic_tools import ConnectionBasedTransport + + +class SpectrumFilter(ConnectionBasedTransport): + + def __init__(self): + super(SpectrumFilter, self).__init__() + data_sampling_rate = rospy.get_param('~data_sampling_rate', 500) + self.high_cut_freq = rospy.get_param('~high_cut_freq', 250) + nyquist_freq = data_sampling_rate / 2.0 + if self.high_cut_freq > nyquist_freq: + rospy.logerr('Set high_cut_freq not more than {} Hz'.format( + nyquist_freq)) + self.low_cut_freq = rospy.get_param('~low_cut_freq', 0) + + self.pub = self.advertise('~output', Spectrum, queue_size=1) + + def subscribe(self): + self.sub = rospy.Subscriber('~input', Spectrum, self.input_cb) + + def unsubscribe(self): + self.sub.unregister() + + def input_cb(self, sub_msg): + amp = np.array(sub_msg.amplitude, dtype=np.float32) + freq = np.array(sub_msg.frequency, dtype=np.float32) + cutoff_mask = np.where( + (self.low_cut_freq <= freq) & (freq <= self.high_cut_freq), + True, False) + pub_msg = Spectrum() + pub_msg.header.stamp = sub_msg.header.stamp + pub_msg.amplitude = amp[cutoff_mask] + pub_msg.frequency = freq[cutoff_mask] + self.pub.publish(pub_msg) + + +if __name__ == '__main__': + rospy.init_node('spectrum_filter') + SpectrumFilter() + rospy.spin() diff --git a/audio_to_spectrogram/scripts/spectrum_plot.py b/audio_to_spectrogram/scripts/spectrum_plot.py index bf27a824f7..2486d192db 100755 --- a/audio_to_spectrogram/scripts/spectrum_plot.py +++ b/audio_to_spectrogram/scripts/spectrum_plot.py @@ -5,6 +5,9 @@ import cv_bridge from jsk_topic_tools import ConnectionBasedTransport +import matplotlib +from audio_to_spectrogram import check_matplotlib_version; check_matplotlib_version() +matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np import rospy @@ -19,13 +22,28 @@ class SpectrumPlot(ConnectionBasedTransport): def __init__(self): super(SpectrumPlot, self).__init__() + self.min_amp = rospy.get_param('~min_amp', 0.0) + self.max_amp = rospy.get_param('~max_amp', 20.0) + self.queue_size = rospy.get_param('~queue_size', 1) + max_rate = rospy.get_param('~max_rate', -1) + if max_rate > 0: + self.min_period = 1.0 / max_rate + else: + self.min_period = -1 + self.prev_pub_tm = None # Set matplotlib config self.fig = plt.figure(figsize=(8, 5)) - self.fig.suptitle('Spectrum plot', size=12) self.fig.subplots_adjust(left=0.1, right=0.95, top=0.90, bottom=0.1, wspace=0.2, hspace=0.6) self.ax = self.fig.add_subplot(1, 1, 1) self.ax.grid(True) + # self.fig.suptitle('Spectrum plot', size=12) + self.ax.set_title('Spectrum plot', fontsize=12) + # Use self.ax.set_title() instead of + # self.fig.suptitle() to use self.fig.tight_layout() + # preventing characters from being cut off + # cf. https://tm23forest.com/contents/matplotlib-tightlayout-with-figure-suptitle + # https://matplotlib.org/2.2.4/tutorials/intermediate/tight_layout_guide.html self.ax.set_xlabel('Frequency [Hz]', fontsize=12) self.ax.set_ylabel('Amplitude', fontsize=12) self.line, = self.ax.plot([0, 0], label='Amplitude of Spectrum') @@ -35,19 +53,31 @@ def __init__(self): def subscribe(self): self.sub_spectrum = rospy.Subscriber( - '~spectrum', Spectrum, self._cb, queue_size=1000) + '~spectrum', Spectrum, self._cb, queue_size=self.queue_size) def unsubscribe(self): self.sub_spectrum.unregister() def _cb(self, msg): + # Keep max_rate + if self.min_period > 0: + curr_tm = rospy.Time.now().to_sec() + if self.prev_pub_tm is not None: + elapsed_tm = curr_tm - self.prev_pub_tm + if elapsed_tm < 0: + # Time moved backwards (e.g., rosbag play --loop) + pass + elif elapsed_tm < self.min_period: + return + self.prev_pub_tm = curr_tm # Plot spectrum self.amp = np.array(msg.amplitude) self.freq = np.array(msg.frequency) self.line.set_data(self.freq, self.amp) self.ax.set_xlim((self.freq.min(), self.freq.max())) - self.ax.set_ylim((0.0, 20)) + self.ax.set_ylim((self.min_amp, self.max_amp)) self.ax.legend(loc='upper right') + self.fig.tight_layout() if self.pub_img.get_num_connections() > 0: bridge = cv_bridge.CvBridge() img = convert_matplotlib_to_img(self.fig) diff --git a/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py b/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py index f5be653334..86d7c21451 100755 --- a/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py +++ b/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py @@ -1,9 +1,13 @@ #!/usr/bin/env python +from __future__ import division + import cv2 from cv_bridge import CvBridge import numpy as np import rospy +from distutils.version import LooseVersion +import pkg_resources from jsk_recognition_msgs.msg import Spectrum from sensor_msgs.msg import Image @@ -34,10 +38,18 @@ def __init__(self): '~spectrogram', Image, queue_size=1) publish_rate = rospy.get_param( '~publish_rate', float(self.image_width / self.spectrogram_period)) - rospy.Timer( - rospy.Duration( - 1.0 / publish_rate), - self.timer_cb) + + timer_kwargs = dict( + period=rospy.Duration(1.0 / publish_rate), + callback=self.timer_cb, + oneshot=False, + ) + if (LooseVersion(pkg_resources.get_distribution('rospy').version) >= + LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None): + # on >=kinetic, it raises ROSTimeMovedBackwardsException + # when we use rosbag play --loop. + timer_kwargs['reset'] = True + self.timer = rospy.Timer(**timer_kwargs) self.bridge = CvBridge() def audio_cb(self, msg): diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py b/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py index d8d91d0b47..657ce0c11c 100644 --- a/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py +++ b/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py @@ -1,2 +1,6 @@ -from .audio_buffer import AudioBuffer -from .convert_matplotlib import convert_matplotlib_to_img +from audio_to_spectrogram.audio_buffer import AudioBuffer +from audio_to_spectrogram.compat import check_matplotlib_version +from audio_to_spectrogram.convert_matplotlib import convert_matplotlib_to_img +from audio_to_spectrogram.data_amplitude_plot import DataAmplitudePlot +from audio_to_spectrogram.data_buffer import DataBuffer +from audio_to_spectrogram.data_to_spectrum import DataToSpectrum diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py b/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py index 6c73c39c15..41f54ed618 100644 --- a/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py +++ b/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py @@ -1,14 +1,9 @@ -from __future__ import division - -import array -from threading import Lock - -from audio_common_msgs.msg import AudioData -import numpy as np import rospy +from audio_to_spectrogram.data_buffer import DataBuffer -class AudioBuffer(object): + +class AudioBuffer(DataBuffer): def __init__(self, topic_name='~audio', input_sample_rate=16000, @@ -18,44 +13,21 @@ def __init__(self, topic_name='~audio', get_latest_data=False, discard_data=False, auto_start=False): - self.is_subscribing = True - self.get_latest_data = get_latest_data - self.discard_data = discard_data - self._window_size = window_size - self.audio_buffer_len = int(self._window_size * input_sample_rate) - self.lock = Lock() - self.bitdepth = bitdepth - self.n_channel = n_channel - self.target_channel = min(self.n_channel - 1, max(0, target_channel)) - self.input_sample_rate = input_sample_rate - self.type_code = {} - for code in ['b', 'h', 'i', 'l']: - self.type_code[array.array(code).itemsize] = code - - self.dtype = self.type_code[self.bitdepth / 8] - self.audio_buffer = np.array([], dtype=self.dtype) - - self.max_value = 2 ** (self.bitdepth - 1) - 1 - - self.topic_name = topic_name - - if auto_start: - self.subscribe() - - def __len__(self): - return len(self.audio_buffer) - @property - def window_size(self): - return self._window_size - - @window_size.setter - def window_size(self, size): - with self.lock: - self._window_size = size - self.audio_buffer_len = int(self._window_size - * self.input_sample_rate) - self.audio_buffer = np.array([], dtype=self.dtype) + super(AudioBuffer, self).__init__( + topic_name=topic_name, + expr_to_get_data='m.data', # field of audio_common_msgs/AudioData + input_sample_rate=input_sample_rate, + window_size=window_size, + is_integer=True, + is_signed=True, + bitdepth=bitdepth, + n_channel=n_channel, + target_channel=target_channel, + get_latest_data=get_latest_data, + discard_data=discard_data, + auto_start=auto_start, + ) @staticmethod def from_rosparam(**kwargs): @@ -68,51 +40,3 @@ def from_rosparam(**kwargs): n_channel=n_channel, target_channel=target_channel, **kwargs) - - def subscribe(self): - self.audio_buffer = np.array([], dtype=self.dtype) - self.sub_audio = rospy.Subscriber( - self.topic_name, AudioData, self.audio_cb) - - def unsubscribe(self): - self.sub_audio.unregister() - - def _read(self, size, normalize=False): - with self.lock: - if self.get_latest_data: - audio_buffer = self.audio_buffer[-size:] - else: - audio_buffer = self.audio_buffer[:size] - if self.discard_data: - self.audio_buffer = self.audio_buffer[size:] - if normalize is True: - audio_buffer = audio_buffer / self.max_value - return audio_buffer - - def sufficient_data(self, size): - return len(self.audio_buffer) < size - - def read(self, size=None, wait=False, normalize=False): - if size is None: - size = self.audio_buffer_len - size = int(size * self.input_sample_rate) - while wait is True \ - and not rospy.is_shutdown() and len(self.audio_buffer) < size: - rospy.sleep(0.001) - return self._read(size, normalize=normalize) - - def close(self): - try: - self.sub_audio.unregister() - except Exception: - pass - self.audio_buffer = np.array([], dtype=self.dtype) - - def audio_cb(self, msg): - audio_buffer = np.frombuffer(msg.data, dtype=self.dtype) - audio_buffer = audio_buffer[self.target_channel::self.n_channel] - with self.lock: - self.audio_buffer = np.append( - self.audio_buffer, audio_buffer) - self.audio_buffer = self.audio_buffer[ - -self.audio_buffer_len:] diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/compat.py b/audio_to_spectrogram/src/audio_to_spectrogram/compat.py new file mode 100644 index 0000000000..2a3cce1c82 --- /dev/null +++ b/audio_to_spectrogram/src/audio_to_spectrogram/compat.py @@ -0,0 +1,25 @@ +import os +import subprocess +import warnings + +import matplotlib + + +def check_matplotlib_version( + python_version=os.getenv('ROS_PYTHON_VERSION')): + if python_version == '2': + python = 'python' + else: + python = 'python{}'.format(python_version) + out = subprocess.check_output( + ['dpkg', '-L', '{}-matplotlib'.format(python)]) + if isinstance(out, bytes): + out = out.decode('utf-8') + matplotlib_filelists = out.split() + if os.path.dirname(matplotlib.__file__) in matplotlib_filelists: + return True + else: + warnings.warn( + "It looks like you are using pip's matplotlib. " + "Apt's matplotlib is recommended.") + return False diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py b/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py index 0b11d6b801..4324cda94e 100644 --- a/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py +++ b/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py @@ -7,6 +7,7 @@ import numpy as np import PIL +import PIL.Image def convert_matplotlib_to_img(fig): diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py new file mode 100644 index 0000000000..dd7311fc99 --- /dev/null +++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py @@ -0,0 +1,100 @@ +from __future__ import division + +from distutils.version import LooseVersion +import pkg_resources +import cv_bridge +from dynamic_reconfigure.server import Server +from jsk_topic_tools import ConnectionBasedTransport +import matplotlib +from audio_to_spectrogram.compat import check_matplotlib_version; check_matplotlib_version() +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import numpy as np +import rospy +import sensor_msgs.msg + +from audio_to_spectrogram.cfg import DataAmplitudePlotConfig as Config +from audio_to_spectrogram.convert_matplotlib import convert_matplotlib_to_img +from audio_to_spectrogram.data_buffer import DataBuffer + + +class DataAmplitudePlot(ConnectionBasedTransport): + + def __init__(self, data_buffer=None): + super(DataAmplitudePlot, self).__init__() + + if data_buffer is None: + self.data_buffer = DataBuffer.from_rosparam() + else: + self.data_buffer = data_buffer + + # Set matplotlib config + self.fig = plt.figure(figsize=(8, 5)) + self.ax = self.fig.add_subplot(1, 1, 1) + self.ax.grid(True) + self.ax.set_xlabel('Time [s]', fontsize=12) + self.ax.set_ylabel('Amplitude', fontsize=12) + self.line, = self.ax.plot([0, 0], label='Amplitude of Input Data') + + # ROS dynamic reconfigure + self.srv = Server(Config, self.config_callback) + + self.pub_img = self.advertise( + '~output/viz', sensor_msgs.msg.Image, queue_size=1) + + def start_timer(self): + rate = rospy.get_param('~rate', 10) + if rate == 0: + rospy.logwarn('You cannot set 0 as the rate; change it to 10.') + rate = 10 + + timer_kwargs = dict( + period=rospy.Duration(1.0 / rate), + callback=self.timer_cb, + oneshot=False, + ) + if (LooseVersion(pkg_resources.get_distribution('rospy').version) >= + LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None): + # on >=kinetic, it raises ROSTimeMovedBackwardsException + # when we use rosbag play --loop. + timer_kwargs['reset'] = True + self.timer = rospy.Timer(**timer_kwargs) + + def stop_timer(self): + self.timer.shutdown() + self.timer = None + + def subscribe(self): + self.data_buffer.subscribe() + self.start_timer() + + def unsubscribe(self): + self.data_buffer.unsubscribe() + self.stop_timer() + + def config_callback(self, config, level): + self.maximum_amplitude = config.maximum_amplitude + self.window_size = config.window_size + self.data_buffer.window_size = self.window_size + return config + + def timer_cb(self, timer): + window_size = self.window_size + amp = self.data_buffer.read(window_size) + if len(amp) == 0: + return + times = np.linspace(-window_size, 0.0, len(amp)) + + # Plot data amplitude. + self.line.set_data(times, amp) + self.ax.set_xlim((times.min(), times.max())) + self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude)) + + self.ax.legend(loc='upper right') + self.fig.tight_layout() + if self.pub_img.get_num_connections() > 0: + bridge = cv_bridge.CvBridge() + img = convert_matplotlib_to_img(self.fig) + img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8') + img_msg.header.stamp = rospy.Time.now() + self.pub_img.publish(img_msg) diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py new file mode 100644 index 0000000000..aef9609edf --- /dev/null +++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py @@ -0,0 +1,170 @@ +from __future__ import division + +import array +from threading import Lock + +import numpy as np +import rospy +import rostopic + + +class DataBuffer(object): + + def __init__(self, topic_name='~input', + expr_to_get_data='m.data', + input_sample_rate=500, + window_size=10.0, + is_integer=False, + is_signed=True, + bitdepth=64, + n_channel=1, target_channel=0, + get_latest_data=False, + discard_data=False, + auto_start=False): + self.is_subscribing = True + self.get_latest_data = get_latest_data + self.discard_data = discard_data + self._window_size = window_size + self.data_buffer_len = int(self._window_size * input_sample_rate) + self.lock = Lock() + self.bitdepth = bitdepth + self.n_channel = n_channel + self.target_channel = min(self.n_channel - 1, max(0, target_channel)) + self.input_sample_rate = input_sample_rate + self.type_code = {} + if is_integer: + if is_signed: + codes = ['b', 'h', 'i', 'l'] + else: + codes = ['B', 'H', 'I', 'L'] + else: + codes = ['f', 'd'] + for code in codes: + self.type_code[array.array(code).itemsize] = code + + self.dtype = self.type_code[self.bitdepth / 8] + self.data_buffer = np.array([], dtype=self.dtype) + + if is_integer: + self.max_value = 2 ** (self.bitdepth - 1) - 1 + else: + self.max_value = None + + self.topic_name = topic_name + self.expr_to_get_data = expr_to_get_data + + if auto_start: + self.subscribe() + + def __len__(self): + return len(self.data_buffer) + + @property + def window_size(self): + return self._window_size + + @window_size.setter + def window_size(self, size): + with self.lock: + self._window_size = size + self.data_buffer_len = int(self._window_size + * self.input_sample_rate) + self.data_buffer = np.array([], dtype=self.dtype) + + @staticmethod + def from_rosparam(**kwargs): + expr_to_get_data = rospy.get_param( + '~expression_to_get_data', 'm.data') + n_channel = rospy.get_param('~n_channel', 1) + target_channel = rospy.get_param('~target_channel', 0) + data_sampling_rate = rospy.get_param('~data_sampling_rate', 500) + is_integer = rospy.get_param('~is_integer', False) + is_signed = rospy.get_param('~is_signed', True) + bitdepth = rospy.get_param('~bitdepth', 64) + return DataBuffer(expr_to_get_data=expr_to_get_data, + input_sample_rate=data_sampling_rate, + is_integer=is_integer, + is_signed=is_signed, + bitdepth=bitdepth, + n_channel=n_channel, + target_channel=target_channel, + **kwargs) + + def subscribe(self): + self.data_buffer = np.array([], dtype=self.dtype) + # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L86-L87 + input_class, input_topic, self.input_fn = rostopic.get_topic_class( + rospy.resolve_name(self.topic_name), blocking=True) + # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L95 + self.sub = rospy.Subscriber( + input_topic, input_class, self.input_cb) + + def unsubscribe(self): + self.sub.unregister() + + def _read(self, size, normalize=False): + with self.lock: + if self.get_latest_data: + data_buffer = self.data_buffer[-size:] + else: + data_buffer = self.data_buffer[:size] + if self.discard_data: + self.data_buffer = self.data_buffer[size:] + if normalize is True: + if self.max_value is not None: + data_buffer = data_buffer / self.max_value + else: + rospy.logerr('normalize is disabled with current settings') + return data_buffer + + def sufficient_data(self, size): + return len(self.data_buffer) < size + + def read(self, size=None, wait=False, normalize=False): + if size is None: + size = self.data_buffer_len + size = int(size * self.input_sample_rate) + while wait is True \ + and not rospy.is_shutdown() and len(self.data_buffer) < size: + rospy.sleep(0.001) + return self._read(size, normalize=normalize) + + def close(self): + try: + self.sub.unregister() + except Exception: + pass + self.data_buffer = np.array([], dtype=self.dtype) + + def input_cb(self, m): + # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L98-L99 + if self.input_fn is not None: + m = self.input_fn(m) + data = eval(self.expr_to_get_data) + data_type = type(data) + if data_type is bytes: + data_buffer = np.frombuffer(data, dtype=self.dtype) + elif data_type in [list, tuple]: + data_buffer = np.fromiter(data, dtype=self.dtype) + elif data_type in [int, float]: + data_buffer = np.array([data], dtype=self.dtype) + else: + rospy.logerr('Unsupported data type: {}'.format(data_type)) + return + # This division into cases assumes + # expr_to_get_data returns a Python built-in numeric or sequence type + # deserialized from a ROS msg by scripts generated by genpy. + # Other than uint8[], this deserialization is executed with + # struct.Struct.unpack() and it returns int or float (or its tuple). + # Against uint8[], this deserialization returns bytes. + # /opt/ros/melodic/lib/python2.7/dist-packages/std_msgs/msg/_Float64.py + # /opt/ros/melodic/lib/python2.7/dist-packages/audio_common_msgs/msg/_AudioData.py + # https://docs.python.org/2.7/library/struct.html#format-characters + # https://github.com/ros/genpy/blob/0.6.16/src/genpy/generate_struct.py#L165 + + data_buffer = data_buffer[self.target_channel::self.n_channel] + with self.lock: + self.data_buffer = np.append( + self.data_buffer, data_buffer) + self.data_buffer = self.data_buffer[ + -self.data_buffer_len:] diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py new file mode 100644 index 0000000000..adcd01f98d --- /dev/null +++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py @@ -0,0 +1,103 @@ +from __future__ import division + +import numpy as np +import rospy +from distutils.version import LooseVersion +import pkg_resources + +from jsk_recognition_msgs.msg import Spectrum + +from audio_to_spectrogram.data_buffer import DataBuffer + + +class DataToSpectrum(object): + + def __init__(self, data_buffer=None): + super(DataToSpectrum, self).__init__() + + if data_buffer is None: + self.data_buffer = DataBuffer.from_rosparam(auto_start=True) + else: + self.data_buffer = data_buffer + fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3) + self.data_buffer.window_size = fft_sampling_period + data_sampling_rate = self.data_buffer.input_sample_rate + + # fft config + window_function = np.arange( + 0.0, 1.0, 1.0 / self.data_buffer.data_buffer_len) + self.window_function = 0.54 - 0.46 * np.cos( + 2 * np.pi * window_function) + self.freq = np.fft.fftfreq( + self.data_buffer.data_buffer_len, d=1./data_sampling_rate) + # How many times fft is executed in one second + # fft_exec_rate equals to output spectrogram hz + self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50) + + # Publisher and Subscriber + self.pub_spectrum = rospy.Publisher( + '~spectrum', Spectrum, queue_size=1) + self.pub_norm_half_spectrum = rospy.Publisher( + '~normalized_half_spectrum', Spectrum, queue_size=1) + self.pub_log_spectrum = rospy.Publisher( + '~log_spectrum', Spectrum, queue_size=1) + + timer_kwargs = dict( + period=rospy.Duration(1.0 / self.fft_exec_rate), + callback=self.timer_cb, + oneshot=False, + ) + if (LooseVersion(pkg_resources.get_distribution('rospy').version) >= + LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None): + # on >=kinetic, it raises ROSTimeMovedBackwardsException + # when we use rosbag play --loop. + timer_kwargs['reset'] = True + self.timer = rospy.Timer(**timer_kwargs) + + def publish_spectrum(self, pub, stamp, amp, freq): + spectrum_msg = Spectrum() + spectrum_msg.header.stamp = stamp + spectrum_msg.amplitude = amp + spectrum_msg.frequency = freq + pub.publish(spectrum_msg) + + def timer_cb(self, timer): + if len(self.data_buffer) != self.data_buffer.data_buffer_len: + return + data = self.data_buffer.read() + stamp = rospy.Time.now() + # Calc spectrum by fft + fft = np.fft.fft(data * self.window_function) + self.publish_spectrum( + self.pub_spectrum, + stamp, + np.abs(fft), + self.freq, + # Usual "amplitude spectrum". + # https://ryo-iijima.com/fftresult/ + ) + self.publish_spectrum( + self.pub_norm_half_spectrum, + stamp, + np.abs(fft[self.freq >= 0] / + (self.data_buffer.data_buffer_len / 2.0)), + self.freq[self.freq >= 0], + # Spectrum which is "half" + # (having non-negative frequencies (0Hz-Nyquist frequency)) + # and is "normalized" + # (consistent with the amplitude of the original signal) + # https://ryo-iijima.com/fftresult/ + # https://stackoverflow.com/questions/63211851/why-divide-the-output-of-numpy-fft-by-n + # https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issue-1550715400 + ) + self.publish_spectrum( + self.pub_log_spectrum, + stamp, + np.log(np.abs(fft)), + self.freq, + # Usually, log is applied to "power spectrum" (np.abs(fft)**2): + # np.log(np.abs(fft)**2), 20*np.log10(np.abs(fft)), ... + # But we use the above equation for simplicity. + # https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issuecomment-1445810380 + # http://makotomurakami.com/blog/2020/05/23/5266/ + ) diff --git a/audio_to_spectrogram/test/audio_to_spectrogram.test b/audio_to_spectrogram/test/audio_to_spectrogram.test index 671975f661..41a687e295 100644 --- a/audio_to_spectrogram/test/audio_to_spectrogram.test +++ b/audio_to_spectrogram/test/audio_to_spectrogram.test @@ -11,6 +11,10 @@ topic_0: /spectrum_to_spectrogram/spectrogram timeout_0: 30 + topic_1: /audio_amplitude_plot/output/viz + timeout_1: 30 + topic_2: /preprocessed_spectrogram_source_plot/output/viz + timeout_2: 30 diff --git a/audio_to_spectrogram/test/wrench_to_spectrogram.test b/audio_to_spectrogram/test/wrench_to_spectrogram.test new file mode 100644 index 0000000000..78281615c9 --- /dev/null +++ b/audio_to_spectrogram/test/wrench_to_spectrogram.test @@ -0,0 +1,21 @@ + + + + + + + + + topic_0: /spectrum_to_spectrogram/spectrogram + timeout_0: 30 + topic_1: /data_amplitude_plot/output/viz + timeout_1: 30 + topic_2: /preprocessed_spectrogram_source_plot/output/viz + timeout_2: 30 + + + + diff --git a/checkerboard_detector/src/objectdetection_tf_publisher.py b/checkerboard_detector/src/objectdetection_tf_publisher.py index f38782074f..bdcb339a28 100755 --- a/checkerboard_detector/src/objectdetection_tf_publisher.py +++ b/checkerboard_detector/src/objectdetection_tf_publisher.py @@ -16,9 +16,9 @@ def __init__(self): self.br = tf.TransformBroadcaster() self.subscriber = rospy.Subscriber("ObjectDetection", ObjectDetection, self.simple_callback); else: - self.init_object_messages() self.frame_id = rospy.get_param("~frame_id", "object") self.subscriber = rospy.Subscriber("ObjectDetection", ObjectDetection, self.callback); + self.init_object_messages() def init_object_messages(self): if rospy.has_param('~checker_board_params/position_x'): diff --git a/doc/install_chainer_gpu.rst b/doc/install_chainer_gpu.md similarity index 67% rename from doc/install_chainer_gpu.rst rename to doc/install_chainer_gpu.md index ce5fc43147..cd4debf0b4 100644 --- a/doc/install_chainer_gpu.rst +++ b/doc/install_chainer_gpu.md @@ -15,37 +15,37 @@ Requirements Version Compatibilities for 18.04 --------------------------------- -(Recommended) Use CUDA 9.1 from Official ubuntu repository (https://packages.ubuntu.com/bionic/nvidia-cuda-dev) +(Recommended) Use CUDA 9.1 from Official ubuntu repository [https://launchpad.net/ubuntu/bionic/+package/nvidia-cuda-dev](https://launchpad.net/ubuntu/bionic/+package/nvidia-cuda-dev) - Chainer - - chainer == 6.7.0 (last version supoprting python2. See https://github.com/chainer/chainer/releases/tag/v6.7.0) - - cupy-cuda91 == 6.7.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support https://docs.chainer.org/en/v6.7.0/install.html) + - chainer == 6.7.0 (last version supoprting python2. See [https://github.com/chainer/chainer/releases/tag/v6.7.0](https://github.com/chainer/chainer/releases/tag/v6.7.0) + - cupy-cuda91 == 6.7.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support [https://docs.chainer.org/en/v6.7.0/install.html](https://docs.chainer.org/en/v6.7.0/install.html) - PyTorch - - pytorch == 1.1.0 (Latest pytorch version supporting CUDA 9.1 https://download.pytorch.org/whl/cu90/torch_stable.html) - - CUDA >= 9.0 (Minimum required version for PyTorch 1.1.0 https://pytorch.org/get-started/previous-versions/#v110) + - pytorch == 1.1.0 (Latest pytorch version supporting CUDA 9.1 [https://download.pytorch.org/whl/cu90/torch_stable.html](https://download.pytorch.org/whl/cu90/torch_stable.html) + - CUDA >= 9.0 (Minimum required version for PyTorch 1.1.0 [https://pytorch.org/get-started/previous-versions/#v110](https://pytorch.org/get-started/previous-versions/#v110) -(Experimental) Use CUDA 10.2 from Nvidia Developer's site (https://developer.nvidia.com/cuda-10.2-download-archive) +(Experimental) Use CUDA 10.2 from Nvidia Developer's site [https://developer.nvidia.com/cuda-10.2-download-archive](https://developer.nvidia.com/cuda-10.2-download-archive) - Chainer - - chainer == 6.7.0 (last version supoprting python2. See https://github.com/chainer/chainer/releases/tag/v6.7.0) - - cupy >=6.7.0,<7.0.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support https://docs.chainer.org/en/v6.7.0/install.html) + - chainer == 6.7.0 (last version supoprting python2. See [https://github.com/chainer/chainer/releases/tag/v6.7.0](https://github.com/chainer/chainer/releases/tag/v6.7.0) + - cupy >=6.7.0,<7.0.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support [https://docs.chainer.org/en/v6.7.0/install.html](https://docs.chainer.org/en/v6.7.0/install.html) - cuDNN < 8 (cupy 6.7.0 requires cuDNN v5000= and <=v7999) - - CUDA 10.2 (cuDNN v7.6.5 requires CUDA 10.2 https://developer.nvidia.com/rdp/cudnn-archive) + - CUDA 10.2 (cuDNN v7.6.5 requires CUDA 10.2 [https://developer.nvidia.com/rdp/cudnn-archive](https://developer.nvidia.com/rdp/cudnn-archive)) - PyTorch - pytorch >= 1.4.0 - - CUDA >= 9.2 (Minimum required version for PyTorch https://pytorch.org/get-started/previous-versions/#v140) - - Driver Version >= 396.26 (From CUDA Toolkit and Corresponding Driver Versions in https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) + - CUDA >= 9.2 (Minimum required version for PyTorch [https://pytorch.org/get-started/previous-versions/#v140](https://pytorch.org/get-started/previous-versions/#v140) + - Driver Version >= 396.26 (From CUDA Toolkit and Corresponding Driver Versions in [https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html) Install CUDA ------------ -- Ubuntu 14.04 : Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux:: +- Ubuntu 14.04 : Download deb file from [https://developer.nvidia.com/cuda-downloads?target_os=Linux](https://developer.nvidia.com/cuda-downloads?target_os=Linux): ```bash # If you'd like to use CUDA8.0 on Ubuntu 14.04. @@ -56,7 +56,7 @@ Install CUDA sudo apt-get install cuda ``` - - Add below to your `~/.bashrc`:: + - Add below to your `~/.bashrc`: ```bash # setup cuda & cudnn @@ -76,7 +76,7 @@ Install CUDA ``` -- Ubuntu 16.04 : Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux:: +- Ubuntu 16.04 : Download deb file from [https://developer.nvidia.com/cuda-downloads?target_os=Linux](https://developer.nvidia.com/cuda-downloads?target_os=Linux): ```bash # If you'd like to use CUDA9.2 on Ubuntu 16.04. @@ -91,7 +91,7 @@ Install CUDA sudo apt install nvidia-cuda-toolkit sudo apt install nvidia-cuda-dev -- (Experimental) Ubuntu 18.04 : CUDA 10.2 is the latest version which supports `jsk_perception`. Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux:: +- (Experimental) Ubuntu 18.04 : CUDA 10.2 is the latest version which supports `jsk_perception`. Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux: ```bash # If you'd like to use CUDA10.2 on Ubuntu 18.04. @@ -124,11 +124,11 @@ Install CUDA Install CUDNN ------------- -- If you install `pip install cupy-cuda91`, you do not need to install CUDNN manually. (c.f. https://github.com/jsk-ros-pkg/jsk_visualization/issues/809). Thus, default 18.04 user can use CUDA 9.1 and `cupy-cuda91==6.7.0` for `chainer==6.7.0` and you can SKIP this section. +- If you install `pip install cupy-cuda91`, you do not need to install CUDNN manually. (c.f. [https://github.com/jsk-ros-pkg/jsk_visualization/issues/809](https://github.com/jsk-ros-pkg/jsk_visualization/issues/809)). Thus, default 18.04 user can use CUDA 9.1 and `cupy-cuda91==6.7.0` for `chainer==6.7.0` and you can SKIP this section. Installing CUDNN manually only requires for experimental user who install CUDA 10.2 manually. -- You need to login at https://developer.nvidia.com/cudnn +- You need to login at [https://developer.nvidia.com/cudnn](https://developer.nvidia.com/cudnn) - Go to cuDNN Download and choose version - Download deb files of cuDNN Runtime Library and cuDNN Developer Library @@ -156,7 +156,7 @@ Install Cupy - (Default) Chainer 6.7.0 requires CuPy 6.7.0 and if you have CUDA 9.1, you can use CuPy pre-compiled binary package. - - Pre-compiled Install Cupy for CUDA 9.1 :: + - Pre-compiled Install Cupy for CUDA 9.1 : ```bash sudo pip install cupy-cuda91==6.7.0 @@ -164,7 +164,7 @@ Install Cupy - (Experimental) If you have newer CUDA version. You need to install CuPy with source distribution. This requires CUDNN before you run `pip install cupy` . - - Source Install Cupy for CUDA 10.2 :: + - Source Install Cupy for CUDA 10.2 : ```bash sudo pip install -vvv cupy --no-cache-dir @@ -174,7 +174,7 @@ Install Cupy Install PyTorch --------------- -- 18.04 provides CUDA 9.1 by defualt. To install PyTorch compatible with this version, download following wheel from https://download.pytorch.org/whl/cu90/torch_stable.html, and install manually. +- 18.04 provides CUDA 9.1 by defualt. To install PyTorch compatible with this version, download following wheel from [https://download.pytorch.org/whl/cu90/torch_stable.html](https://download.pytorch.org/whl/cu90/torch_stable.html), and install manually. ```bash sudo pip install torch-1.1.0-cp27-cp27mu-linux_x86_64.whl @@ -187,12 +187,12 @@ Install PyTorch sudo pip install torch==1.4.0 ``` -- See https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260 for more info. +- See [https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260](https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260) for more info. Try Chainer Samples ----------- -You can try to run samples to check if the installation succeeded:: +You can try to run samples to check if the installation succeeded: roslaunch jsk_perception sample_fcn_object_segmentation.launch gpu:=0 roslaunch jsk_perception sample_people_pose_estimation_2d.launch GPU:=0 @@ -201,7 +201,7 @@ You can try to run samples to check if the installation succeeded:: Try PyTorch Samples ----------- -You can try to run samples to check if the installation succeeded:: +You can try to run samples to check if the installation succeeded: roslaunch jsk_perception sample_hand_pose_estimation_2d.launch gpu:=0 diff --git a/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst b/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst index 40debd2693..77a78990b8 100644 --- a/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst +++ b/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst @@ -12,6 +12,12 @@ Subscribing Topic * ``~input`` (``sensor_msgs/PoinCloud2``) +Advertising Services +----------------- +* ``~save_pcd`` (``std_srvs/Empty``) + + Convert PointCloud2 to pcd, saved under `{prefix}{stamp}.pcd`. + Parameters ---------- @@ -37,3 +43,5 @@ Parameters * ``duration`` (Double, default: ``1.0``) Saving duration. You can change saving frequency with this parameter. + If the duration is greater than 0.0, the pcd data is stored continuously under the set duration. + When you want to use this as ROS service, set the duration to 0.0. diff --git a/doc/jsk_perception/nodes/aws_detect_faces.rst b/doc/jsk_perception/nodes/aws_detect_faces.rst index 1c23d9fc94..6bbd29abf4 100644 --- a/doc/jsk_perception/nodes/aws_detect_faces.rst +++ b/doc/jsk_perception/nodes/aws_detect_faces.rst @@ -22,7 +22,7 @@ Subscribing Topic Publishing Topic ---------------- -* ``~faces`` (``opencv_msgs/FaceArrayStamped``) +* ``~faces`` (``opencv_apps/FaceArrayStamped``) Detected face positions, facial landmarks, emotions, presence of beard, sunglasses, and so on. diff --git a/doc/jsk_perception/nodes/classification_node.md b/doc/jsk_perception/nodes/classification_node.md new file mode 100644 index 0000000000..3262618c48 --- /dev/null +++ b/doc/jsk_perception/nodes/classification_node.md @@ -0,0 +1,83 @@ +# classification_node.py + +![](images/clip.png) + +The ROS node for Classification with CLIP. + +## System Configuration +![](images/large_scale_vil_system.png) + +This node requires to work with the Docker Container for inference. Please build the container at first following Setup instruction. + +### Prerequisite +This node requires NVIDIA GPU and more than 4GB GRAM to work properly. +You have to install nvidia-container-toolkit for using GPU with docker. Please follow [official instruction](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). + +### Build the docker image +You have to build the docker image of OFA + +```shell +roscd jsk_perception/docker +make +``` + +## Subscribing topic +* `~image` (`sensor_msgs/Image`) + + Input image + +## Publishing topic +* `~result` (`jsk_recognition_msgs/ClassificationResult`) + + Classification result + +* `~result/image` (`sensor_msgs/Image`) + + Images used for inference + +* `~visualize` (`std_msgs/String`) + + Classification result to visualize + +## Action topic +* `~inference_server/goal` (`jsk_recognition_msgs/ClassificationTaskActionGoal`) + + Classification request with custom categories and image + +* `~inference_server/result` (`jsk_recognition_msgs/ClassificationTaskActionResult`) + + Clasification result of `~inference_server/goal` + +## Parameters +* `~host` (String, default: `localhost`) + + The host name or IP of inference container + +* `~port` (Integer, default: `8080`) + + The HTTP port of inference container + +## Dynamic Reconfigure Parameters +* `~queries` (string, default: `human;kettle;cup;glass`) + + Default categories used for subscribing image topic. + + You can send multiple queries with separating semicolon. + +### Run inference container on another host or another terminal +In the remote GPU machine, +```shell +cd jsk_recognition/jsk_perception/docker +./run_jsk_vil_api clip --port (Your vacant port) +``` + +In the ROS machine, +```shell +roslaunch jsk_perception classification.launch port:=(Your inference container port) host:=(Your inference container host) CLASSIFICATION_INPUT_IMAGE:=(Your image topic name) gui:=true +``` + + +### Run both inference container and ros node in single host +``` +roslaunch jsk_perception classification.launch run_api:=true CLASSIFICATION_INPUT_IMAGE:=(Your image topic name) gui:=true +``` diff --git a/doc/jsk_perception/nodes/images/clip.png b/doc/jsk_perception/nodes/images/clip.png new file mode 100644 index 0000000000..059b31e32b Binary files /dev/null and b/doc/jsk_perception/nodes/images/clip.png differ diff --git a/doc/jsk_perception/nodes/images/invert_mask_image.jpg b/doc/jsk_perception/nodes/images/invert_mask_image.jpg new file mode 100644 index 0000000000..041079262a Binary files /dev/null and b/doc/jsk_perception/nodes/images/invert_mask_image.jpg differ diff --git a/doc/jsk_perception/nodes/images/large_scale_vil_system.png b/doc/jsk_perception/nodes/images/large_scale_vil_system.png new file mode 100644 index 0000000000..4856b85d09 Binary files /dev/null and b/doc/jsk_perception/nodes/images/large_scale_vil_system.png differ diff --git a/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png b/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png new file mode 100644 index 0000000000..fd1cde2b7b Binary files /dev/null and b/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png differ diff --git a/doc/jsk_perception/nodes/images/vqa.png b/doc/jsk_perception/nodes/images/vqa.png new file mode 100644 index 0000000000..132e32784d Binary files /dev/null and b/doc/jsk_perception/nodes/images/vqa.png differ diff --git a/doc/jsk_perception/nodes/invert_mask_image.md b/doc/jsk_perception/nodes/invert_mask_image.md new file mode 100644 index 0000000000..30be57a1a0 --- /dev/null +++ b/doc/jsk_perception/nodes/invert_mask_image.md @@ -0,0 +1,29 @@ +# InvertMaskImage + +![](images/invert_mask_image.jpg) + +Inverts the input mask image. + +## Subscribing Topic + +* `~input` (`sensor_msgs/Image`) + + Input mask image. + +## Publishing Topic + +* `~output` (`sensor_msgs/Image`) + + Inverted mask image. + +## Parameters + +* `~queue_size` (Int, default: `1`) + + How many messages you allow about the subscriber to keep in the queue. + +## Sample + +```bash +roslaunch jsk_perception sample_invert_mask_image.launch +``` diff --git a/doc/jsk_perception/nodes/point_pose_extractor.md b/doc/jsk_perception/nodes/point_pose_extractor.md index b7a7996917..bb28a99962 100644 --- a/doc/jsk_perception/nodes/point_pose_extractor.md +++ b/doc/jsk_perception/nodes/point_pose_extractor.md @@ -8,7 +8,8 @@ - `/ImageFeature0D` (`posedetection_msgs::ImageFeature0D`) - Image, camera and tempalte feature information. + Image, camera and template feature information. + You can use [ImageShift](https://jsk-docs.readthedocs.io/projects/jsk_recognition/en/latest/imagesift/nodes/imagesift.html) to get it. ## Publishing Topics @@ -30,62 +31,75 @@ - `/tf` (`tf2_msgs/TFMessage`) - Detected Object Frame. + Detected Object Frame when `~publish_tf` is set to true. ## Parameters -- `~template_filename` (str default: "/sample/opencv-logo2.png") - + path to template image -- `~object_width` (float) - + Width of template image -- `~object_height` (float) - + Height of template image -- `~reprojection_threshold` (float default: 3.0) -- `~distanceratio_threshold` (float default: 0.49) -- `~error_threshold` (float default: 50.0) -- `~theta_step` (float default: 5.0) -- `~phi_step` (float default: 5.0) -- `~viewer_window` (bool default: true) -- `~window_name` (str default: "sample1") -- `~autosize` (bool default: false) -- `~publish_null_object_detection` (bool default: false) -- `~publish_tf` (bool defaut: false) - + If set to true, detected object pose is also broadcasted as tf frame. -- `~child_frame_id` (string default: "matching") - + frame_id of detected object when `~broadcast_tf` is set to true. +- `~template_filename` (str default: `"/sample/opencv-logo2.png"`) + + path to template image + +- `~object_width` (float default: `0.06`) + + Width of template image + +- `~object_height` (float default: `0.0739`) + + Height of template image + +- `~relative_pose` (str default: `"0 0 0 0 0 0 1"`) + + Coordinate of the object relative to the texture + +- `~reprojection_threshold` (float default: `3.0`) +- `~distanceratio_threshold` (float default: `0.49`) +- `~error_threshold` (float default: `50.0`) +- `~theta_step` (float default: `5.0`) +- `~phi_step` (float default: `5.0`) +- `~viewer_window` (bool default: `true`) +- `~window_name` (str default: `"sample1"`) +- `~autosize` (bool default: `false`) + + The window size is automatically adjusted to fit the displayed image, and you cannot change the window size manually. + +- `~publish_null_object_detection` (bool default: `false`) +- `~publish_tf` (bool defaut: `false`) + + If set to true, detected object pose is also broadcasted as tf frame. + +- `~child_frame_id` (string default: `"matching"`) + + frame_id of detected object when `~publish_tf` is set to true. ## Service -- `/SetTemplate` +- `/SetTemplate` (`SetTemplate`) Used to add another template. +## Mouse Event + +![](images/point_pose_extractor_mouse.png) + + Set template from viewer window. + + To specify the range of template, left-click four corners clockwise from upper left. Selected points are reset by right-clicking. + + After all four points are selected, you can input template's width, height and filename. The filename should have an extention. + ## Sample `sample_point_pose_extractor.launch` -``` - - - - - - - - -``` ``` -roslaunch sample_point_pose_extractor.launch +roslaunch jsk_perception sample_point_pose_extractor.launch rostopic echo /ObjectDetection ``` ### Example of how to run set_template service + `client.call` is available only after the node receives `ImageFeature0D`. + ``` #!/usr/bin/env python # -*- coding: utf-8 -*- diff --git a/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md b/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md index e32930dfac..adfa13e358 100644 --- a/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md +++ b/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md @@ -7,10 +7,6 @@ Sample is `sample_rect_array_in_panorama_to_bounding_box_array.launch` ## Subscribing Topics -* `~panorama_image` (`sensor_msgs/Image`) - -Panorama image topic. - * `~panorama_info` (`jsk_recognition_msgs/PanoramaInfo`) Panorama info topic. diff --git a/doc/jsk_perception/nodes/vqa_node.md b/doc/jsk_perception/nodes/vqa_node.md new file mode 100644 index 0000000000..f31f0c0498 --- /dev/null +++ b/doc/jsk_perception/nodes/vqa_node.md @@ -0,0 +1,96 @@ +# vqa_node.py +![](images/vqa.png) + +The ROS node for Visual Question Answering(VQA). + +## System Configuration +![](images/large_scale_vil_system.png) + +This node requires to work with the Docker Container for inference. Please build the container at first following Setup instruction. + +## Setup + +### Prerequisite +This node requires NVIDIA GPU and more than 4GB GRAM to work properly. +You have to install nvidia-container-toolkit for using GPU with docker. Please follow [official instruction](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). + +### Build the docker image +You have to build the docker image of OFA + +```shell +roscd jsk_perception/docker +make +``` + +## Subscribing topic +* `~image` (`sensor_msgs/Image`) + + Input image + +## Publishing topic +* `~result` (`jsk_recognition_msgs/VQAResult`) + + VQA result + +* `~result/image` (`sensor_msgs/Image`) + + Images used for inference + +* `~visualize` (`std_msgs/String`) + + VQA question and answer to visualize + +## Action topic +* `~inference_server/goal` (`jsk_recognition_msgs/VQATaskActionGoal`) + + VQA request with custom questions and image + +* `~inference_server/result` (`jsk_recognition_msgs/VQATaskActionResult`) + + VQA result of `~inference_server/goal` + +## Parameters +* `~host` (String, default: `localhost`) + + The host name or IP of inference container + +* `~port` (Integer, default: `8080`) + + The HTTP port of inference container + +## Dynamic Reconfigure Parameters +* `~questions` (string, default: `what does this image describe?`) + + Default questions used for subscribing image topic. + + You can send multiple questions with separating semicolon like the below. + ``` + What does this image describe?;What kinds of objects exists? + ``` + +## Sample + +### Run inference container on another host or another terminal +In the remote GPU machine, +```shell +cd jsk_recognition/jsk_perception/docker +./run_jsk_vil_api --port (Your vacant port) --ofa_task caption --ofa_model_scale huge +``` + + +`--ofa_task` should be `caption` or `vqa`. Empirically, the output results are more natural for VQA tasks with the Caption model than with the VQA model in OFA. + + +If you encounter GPU ram shortage, please make `ofa_model_scale` `large` . + + +In the ROS machine, +```shell +roslaunch jsk_perception vqa.launch port:=(Your inference container port) host:=(Your inference container host) VQA_INPUT_IMAGE:=(Your image topic name) gui:=true +``` + + +### Run both inference container and ros node in single host +``` +roslaunch jsk_perception vqa.launch run_api:=true VQA_INPUT_IMAGE:=(Your image topic name) gui:=true +``` diff --git a/imagesift/test/test_imagesift.test b/imagesift/test/test_imagesift.test index 2ca00099cf..929c19e6a5 100644 --- a/imagesift/test/test_imagesift.test +++ b/imagesift/test/test_imagesift.test @@ -6,7 +6,8 @@ + pkg="jsk_tools" type="test_topic_published.py" + retry="3"> topic_0: /Feature0D timeout_0: 10 diff --git a/jsk_pcl_ros/CMakeLists.txt b/jsk_pcl_ros/CMakeLists.txt index 49b1b37a93..34cd2d65d9 100644 --- a/jsk_pcl_ros/CMakeLists.txt +++ b/jsk_pcl_ros/CMakeLists.txt @@ -142,7 +142,7 @@ generate_dynamic_reconfigure_options( find_package(OpenCV REQUIRED core imgproc) -include_directories(include ${catkin_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS}) +include_directories(include ${catkin_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS} ${ml_classifiers_INCLUDE_DIRS}) link_directories(${catkin_LIBRARY_DIRS}) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z defs") @@ -507,10 +507,34 @@ install(TARGETS ${jsk_pcl_ros_install_libraries} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} ) -install(DIRECTORY config euslisp launch plugins sample scripts test +install(DIRECTORY config euslisp launch plugins test DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} USE_SOURCE_PERMISSIONS ) +if($ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason... +install(DIRECTORY sample scripts + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} + USE_SOURCE_PERMISSIONS + ) +else() +install(DIRECTORY sample scripts + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} + USE_SOURCE_PERMISSIONS + PATTERN "*" + PATTERN "*/*.py" EXCLUDE +) + +file(GLOB SCRIPT_PROGRAMS scripts/*.py) +catkin_install_python( + PROGRAMS ${SCRIPT_PROGRAMS} + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/scripts +) +file(GLOB SAMPLE_SCRIPT_PROGRAMS sample/*.py) +catkin_install_python( + PROGRAMS ${SAMPLE_SCRIPT_PROGRAMS} + DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/sample +) +endif() if (CATKIN_ENABLE_TESTING) find_package(jsk_perception REQUIRED) diff --git a/jsk_pcl_ros/package.xml b/jsk_pcl_ros/package.xml index 51635bba71..b4ef3165d4 100644 --- a/jsk_pcl_ros/package.xml +++ b/jsk_pcl_ros/package.xml @@ -91,6 +91,8 @@ visualization_msgs yaml-cpp + compressed_depth_image_transport + compressed_image_transport jsk_perception jsk_tools roslaunch diff --git a/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml b/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml index b77e1cb643..f88073eacd 100644 --- a/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml +++ b/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml @@ -6,11 +6,12 @@ + + args="--rate $(arg RATE) $(find jsk_pcl_ros)/sample/data/baxter_realsense_l515.bag --clock --loop"/> diff --git a/jsk_pcl_ros/sample/sample_incremental_model_registration.launch b/jsk_pcl_ros/sample/sample_incremental_model_registration.launch index a5d35e1ea8..4e43474db5 100644 --- a/jsk_pcl_ros/sample/sample_incremental_model_registration.launch +++ b/jsk_pcl_ros/sample/sample_incremental_model_registration.launch @@ -3,7 +3,7 @@ - + - + + + diff --git a/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py b/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py index 5e464e0745..f18e6d01f0 100755 --- a/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py +++ b/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py @@ -348,7 +348,7 @@ def updatePlot(): def generateFrequencyMap(): global width, height # bgr - img = np.tile(np.uint8([0,0,0]), (height / 10, width / 10, 1)) + img = np.tile(np.uint8([0,0,0]), (height // 10, width // 10, 1)) frequency = dict() for (u, v) in value_cache.keys(): min_color = np.uint8([255, 0, 0]) diff --git a/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp b/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp index 8999bbd05b..abb16d9b84 100644 --- a/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp +++ b/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp @@ -207,6 +207,7 @@ namespace jsk_pcl_ros std_srvs::Empty::Request& req, std_srvs::Empty::Response& res) { + boost::mutex::scoped_lock lock(mutex_); if (samples_.size() <= 1) { ROS_ERROR("no enough samples"); return false; @@ -251,6 +252,7 @@ namespace jsk_pcl_ros nonregistered_ros_cloud.header.stamp = ros::Time::now(); nonregistered_ros_cloud.header.frame_id = frame_id_; pub_cloud_non_registered_.publish(nonregistered_ros_cloud); + return true; } } diff --git a/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test b/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test index eb0e57e0b7..378be8ad4e 100644 --- a/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test +++ b/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test @@ -92,7 +92,7 @@ + time-limit="30" retry="3"> check_times: 10 @@ -102,7 +102,7 @@ + time-limit="30" retry="3"> check_times: 10 @@ -112,7 +112,7 @@ + time-limit="30" retry="3"> check_times: 10 diff --git a/jsk_pcl_ros/test/test_convex_connected_voxels.test b/jsk_pcl_ros/test/test_convex_connected_voxels.test index 6f632d3796..e22143f7d6 100644 --- a/jsk_pcl_ros/test/test_convex_connected_voxels.test +++ b/jsk_pcl_ros/test/test_convex_connected_voxels.test @@ -12,7 +12,8 @@ + pkg="jsk_tools" type="test_topic_published.py" + retry="3" time-limit="120"> topic_0: /sample_convex_connected_voxels/output/indices timeout_0: 100 diff --git a/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test b/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test index bd3a095e4d..56519753ae 100644 --- a/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test +++ b/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test @@ -2,15 +2,16 @@ + + retry="3" time-limit="120"> topic_0: /statistical_outlier_removal/output - timeout_0: 30 + timeout_0: 100 diff --git a/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h b/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h index 5720025844..78ae3d4172 100644 --- a/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h +++ b/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h @@ -41,6 +41,7 @@ #include #include +#include #include #include @@ -55,6 +56,8 @@ namespace jsk_pcl_ros_utils virtual void onInit(); virtual void timerCallback (const ros::TimerEvent& event); virtual void configCallback(Config &config, uint32_t level); + void savePCD(); + bool savePCDCallback(std_srvs::Empty::Request& req, std_srvs::Empty::Response& res); boost::mutex mutex_; boost::shared_ptr > srv_; ros::Timer timer_; @@ -66,6 +69,7 @@ namespace jsk_pcl_ros_utils bool compressed_; std::string fixed_frame_; tf::TransformListener* tf_listener_; + ros::ServiceServer srv_save_pcd_server_; private: }; } diff --git a/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py b/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py index 818d38d66d..ce05f59536 100755 --- a/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py +++ b/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py @@ -6,6 +6,27 @@ from jsk_rviz_plugins.overlay_text_interface import OverlayTextInterface from jsk_recognition_msgs.msg import BoolStamped +# workaround until https://github.com/jsk-ros-pkg/jsk_visualization/pull/864 is merged and released. +class OverlayTextInterface_fix(OverlayTextInterface): + def publish(self, text): + msg = OverlayText() + msg.text = text + msg.width = int(self.config.width) + msg.height = int(self.config.height) + msg.top = int(self.config.top) + msg.left = int(self.config.left) + msg.fg_color.a = self.config.fg_alpha + msg.fg_color.r = self.config.fg_red + msg.fg_color.g = self.config.fg_green + msg.fg_color.b = self.config.fg_blue + msg.bg_color.a = self.config.bg_alpha + msg.bg_color.r = self.config.bg_red + msg.bg_color.g = self.config.bg_green + msg.bg_color.b = self.config.bg_blue + msg.text_size = self.config.text_size + self.pub.publish(msg) + + g_lock = Lock() g_msg = None def callback(msg): @@ -23,7 +44,7 @@ def publish_text(event): if __name__ == "__main__": rospy.init_node("cloud_on_plane_info") - text_interface = OverlayTextInterface("~text") + text_interface = OverlayTextInterface_fix("~text") sub = rospy.Subscriber("~input", BoolStamped, callback) rospy.Timer(rospy.Duration(0.1), publish_text) rospy.spin() diff --git a/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp b/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp index 294a7d4bbc..fe9be68586 100644 --- a/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp +++ b/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp @@ -49,7 +49,16 @@ namespace jsk_pcl_ros_utils timer_.stop(); } - void PointCloudToPCD::timerCallback (const ros::TimerEvent& event) + void PointCloudToPCD::timerCallback (const ros::TimerEvent& event) { + savePCD(); + } + + bool PointCloudToPCD::savePCDCallback(std_srvs::Empty::Request& req, std_srvs::Empty::Response& res) { + savePCD(); + return true; + } + + void PointCloudToPCD::savePCD() { pcl::PCLPointCloud2::ConstPtr cloud; cloud = ros::topic::waitForMessage("input", *pnh_); @@ -58,7 +67,7 @@ namespace jsk_pcl_ros_utils return; } - ROS_INFO ("Received %d data points in frame %s with the following fields: %s", + NODELET_INFO ("Received %d data points in frame %s with the following fields: %s", (int)cloud->width * cloud->height, cloud->header.frame_id.c_str (), pcl::getFieldsList (*cloud).c_str ()); @@ -67,7 +76,7 @@ namespace jsk_pcl_ros_utils Eigen::Quaternionf q = Eigen::Quaternionf::Identity (); if (!fixed_frame_.empty ()) { if (!tf_listener_->waitForTransform (fixed_frame_, cloud->header.frame_id, pcl_conversions::fromPCL (cloud->header).stamp, ros::Duration (duration_))) { - ROS_WARN("Could not get transform!"); + NODELET_WARN("Could not get transform!"); return; } tf::StampedTransform transform_stamped; @@ -81,7 +90,7 @@ namespace jsk_pcl_ros_utils std::stringstream ss; ss << prefix_ << cloud->header.stamp << ".pcd"; - ROS_INFO ("Data saved to %s", ss.str ().c_str ()); + NODELET_INFO ("Data saved to %s", ss.str ().c_str ()); pcl::PCDWriter writer; if(binary_) @@ -107,21 +116,22 @@ namespace jsk_pcl_ros_utils srv_ = boost::make_shared > (*pnh_); dynamic_reconfigure::Server::CallbackType f = boost::bind(&PointCloudToPCD::configCallback, this, _1, _2); srv_->setCallback (f); + srv_save_pcd_server_ = pnh_->advertiseService("save_pcd", &PointCloudToPCD::savePCDCallback, this); tf_listener_ = jsk_recognition_utils::TfListenerSingleton::getInstance(); if(binary_) { if(compressed_) { - ROS_INFO_STREAM ("Saving as binary compressed PCD"); + NODELET_INFO("Saving as binary compressed PCD") ; } else { - ROS_INFO_STREAM ("Saving as binary PCD"); + NODELET_INFO("Saving as binary PCD"); } } else { - ROS_INFO_STREAM ("Saving as binary PCD"); + NODELET_INFO("Saving as ASCII PCD"); } } @@ -133,7 +143,10 @@ namespace jsk_pcl_ros_utils compressed_ = config.compressed; fixed_frame_ = config.fixed_frame; duration_ = config.duration; - timer_ = pnh_->createTimer(ros::Duration(duration_), boost::bind(&PointCloudToPCD::timerCallback, this, _1)); + timer_.stop(); + if (duration_ != 0) { + timer_ = pnh_->createTimer(ros::Duration(duration_), boost::bind(&PointCloudToPCD::timerCallback, this, _1)); + } } } diff --git a/jsk_perception/CMakeLists.txt b/jsk_perception/CMakeLists.txt index 6b568589ae..428e6c8a98 100644 --- a/jsk_perception/CMakeLists.txt +++ b/jsk_perception/CMakeLists.txt @@ -43,9 +43,7 @@ find_package(catkin REQUIRED COMPONENTS find_package(roseus) -if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason... - catkin_python_setup() -endif() +catkin_python_setup() find_package(Boost REQUIRED COMPONENTS filesystem system) @@ -155,6 +153,9 @@ generate_dynamic_reconfigure_options( cfg/VirtualCameraMono.cfg cfg/AWSDetectFaces.cfg cfg/RemoveBlurredFrames.cfg + cfg/VQA.cfg + cfg/Classification.cfg + cfg/VideoToScene.cfg ) message(WARNING "DEPRECATION WARNING: srv files in jsk_pcl_ros are deprecated." @@ -238,6 +239,7 @@ jsk_add_nodelet(src/sparse_image_decoder.cpp "jsk_perception/SparseImageDecoder" jsk_add_nodelet(src/color_histogram.cpp "jsk_perception/ColorHistogram" "color_histogram") jsk_add_nodelet(src/background_substraction_nodelet.cpp "jsk_perception/BackgroundSubstraction" "background_substraction") jsk_add_nodelet(src/fisheye_to_panorama.cpp "jsk_perception/FisheyeToPanorama" "fisheye_to_panorama") +jsk_add_nodelet(src/point_pose_extractor.cpp "jsk_perception/PointPoseExtractor" "point_pose_extractor") # See https://github.com/drNoob13/fisheyeStitcher#pre-requisite if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.0" AND # >= 7.1 "${OpenCV_VERSION}" VERSION_GREATER "2.9.9" AND # >= 3.0.0 @@ -260,6 +262,7 @@ jsk_add_nodelet(src/overlay_image_color_on_mono.cpp "jsk_perception/OverlayImage jsk_add_nodelet(src/grid_label.cpp "jsk_perception/GridLabel" "grid_label") jsk_add_nodelet(src/color_histogram_label_match.cpp "jsk_perception/ColorHistogramLabelMatch" "color_histogram_label_match") jsk_add_nodelet(src/apply_mask_image.cpp "jsk_perception/ApplyMaskImage" "apply_mask_image") +jsk_add_nodelet(src/invert_mask_image.cpp "jsk_perception/InvertMaskImage" "invert_mask_image") jsk_add_nodelet(src/unapply_mask_image.cpp "jsk_perception/UnapplyMaskImage" "unapply_mask_image") jsk_add_nodelet(src/single_channel_histogram.cpp "jsk_perception/SingleChannelHistogram" "single_channel_histogram") jsk_add_nodelet(src/blob_detector.cpp "jsk_perception/BlobDetector" "blob_detector") @@ -294,7 +297,9 @@ jsk_add_nodelet(src/gaussian_blur.cpp "jsk_perception/GaussianBlur" "gaussian_bl jsk_add_nodelet(src/kmeans.cpp "jsk_perception/KMeans" "kmeans") jsk_add_nodelet(src/draw_rects.cpp "jsk_perception/DrawRects" "draw_rects") jsk_add_nodelet(src/remove_blurred_frames.cpp "jsk_perception/RemoveBlurredFrames" "remove_blurred_frames") +jsk_add_nodelet(src/split_image.cpp "jsk_perception/SplitImage" "split_image") if("${OpenCV_VERSION}" VERSION_GREATER "2.9.9") # >= 3.0.0 + jsk_add_nodelet(src/video_to_scene.cpp "jsk_perception/VideoToScene" "video_to_scene") jsk_add_nodelet(src/bing.cpp "jsk_perception/Bing" "bing") endif() jsk_add_nodelet(src/tabletop_color_difference_likelihood.cpp "jsk_perception/TabletopColorDifferenceLikelihood" "tabletop_color_difference_likelihood") @@ -352,7 +357,6 @@ target_link_libraries(oriented_gradient ${catkin_LIBRARIES} ${Boost_LIBRARIES}) jsk_add_node(src/camshiftdemo.cpp camshiftdemo) jsk_add_node(src/linemod.cpp linemod) -jsk_add_node(src/point_pose_extractor.cpp point_pose_extractor) jsk_add_node(src/white_balance_converter.cpp white_balance_converter) jsk_add_node(src/rectangle_detector.cpp rectangle_detector) jsk_add_node(src/calc_flow.cpp calc_flow) @@ -449,9 +453,17 @@ catkin_install_python( PROGRAMS ${NODE_SCRIPT_PROGRAMS} DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts ) +if(CATKIN_DEVEL_PREFIX) + add_custom_target(link_dir_mkdir + COMMAND ${CMAKE_COMMAND} -E make_directory ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts) + foreach(SUB_DIR craft deep_sort hmr openpose) + add_custom_target(link_dir_${SUB_DIR} ALL + COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/node_scripts/${SUB_DIR} ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/${SUB_DIR}) + add_dependencies(link_dir_${SUB_DIR} link_dir_mkdir) + endforeach() +endif(CATKIN_DEVEL_PREFIX) endif() - # ------------------------------------------------------------------------------------ # Test # ------------------------------------------------------------------------------------ @@ -547,6 +559,8 @@ if(CATKIN_ENABLE_TESTING) jsk_add_rostest(test/overlay_image_color_on_mono.test) jsk_add_rostest(test/people_pose_estimation_2d.test) jsk_add_rostest(test/human_mesh_recovery.test) + jsk_add_rostest(test/point_pose_extractor.test) + jsk_add_rostest(test/point_pose_extractor_group_ns.test) jsk_add_rostest(test/pointit.test) jsk_add_rostest(test/polygon_array_color_histogram.test) jsk_add_rostest(test/polygon_array_color_likelihood.test) @@ -586,6 +600,9 @@ if(CATKIN_ENABLE_TESTING) jsk_add_rostest(test/ycc_decomposer.test) jsk_add_rostest(test/depth_image_filter.test) jsk_add_rostest(test/remove_blurred_frames.test) + if(TARGET JSK_NODELET_${PROJECT_NAME}_video_to_scene) + jsk_add_rostest(test/video_to_scene.test) + endif() if(TARGET JSK_NODELET_${PROJECT_NAME}_bing) jsk_add_rostest(test/bing.test) endif() diff --git a/jsk_perception/cfg/Classification.cfg b/jsk_perception/cfg/Classification.cfg new file mode 100755 index 0000000000..83a569377c --- /dev/null +++ b/jsk_perception/cfg/Classification.cfg @@ -0,0 +1,10 @@ +#!/usr/bin/env python +PACKAGE = "jsk_perception" + +from dynamic_reconfigure.parameter_generator_catkin import * + +gen = ParameterGenerator() + +gen.add("queries", str_t, 0, "A queries of Caption Task. The sentences are separated with ;", "human;kettle;cup;glass") + +exit(gen.generate(PACKAGE, "jsk_perception", "Classification")) diff --git a/jsk_perception/cfg/VQA.cfg b/jsk_perception/cfg/VQA.cfg new file mode 100755 index 0000000000..2f97b15c61 --- /dev/null +++ b/jsk_perception/cfg/VQA.cfg @@ -0,0 +1,12 @@ +#!/usr/bin/env python +PACKAGE = "jsk_perception" + +from dynamic_reconfigure.parameter_generator_catkin import * + +gen = ParameterGenerator() + +gen.add("questions", str_t, 0, + "Default questions of VQA. You can split questions by ;", + "what does this image describe?") + +exit(gen.generate(PACKAGE, "jsk_perception", "VQA")) diff --git a/jsk_perception/cfg/VideoToScene.cfg b/jsk_perception/cfg/VideoToScene.cfg new file mode 100755 index 0000000000..617e99ffca --- /dev/null +++ b/jsk_perception/cfg/VideoToScene.cfg @@ -0,0 +1,12 @@ +#!/usr/bin/env python + +PACKAGE='jsk_perception' + +from dynamic_reconfigure.parameter_generator_catkin import * + +gen = ParameterGenerator() + +gen.add("min_percent", int_t, 0, "The percentage of forgrand when the motion has stopped and publish image", 5, 0, 100) +gen.add("max_percent", int_t, 0, "The percentage of forgrand when the scene is not settled", 20, 0, 100) + +exit(gen.generate(PACKAGE, "jsk_perception", "VideoToScene")) diff --git a/jsk_perception/docker/Makefile b/jsk_perception/docker/Makefile new file mode 100644 index 0000000000..83eb2800ab --- /dev/null +++ b/jsk_perception/docker/Makefile @@ -0,0 +1,51 @@ +# +# Author: Yoshiki Obinata +# + +# api directories +OFAPROJECT = ofa +CLIPPROJECT = clip +# image names +OFAIMAGE = jsk-ofa-server +CLIPIMAGE = jsk-clip-server +# commands +BUILDIMAGE = docker build +REMOVEIMAGE = docker rmi +# configuration directories +ifdef ROS_HOME +CONFIGDIR = $(ROS_HOME)/data/jsk_perception +else +CONFIGDIR = $(HOME)/.ros/data/jsk_perception +endif +PARAMDIR = $(CONFIGDIR)/vil_params +# Download files +PARAMURLS = parameter_urls.txt +# OFA parameters +OFAPARAMFILES = $(foreach param, $(OFAPARAMS), $(PARAMDIR)/$(param)) + +all: ofa clip + +# TODO check command wget exists, nvidia-driver version + +# TODO add clip +$(PARAMDIR)/.download: + @if [ ! -d $(PARAMDIR) ]; then\ + mkdir -p $(PARAMDIR); \ + fi + wget -c -i $(PARAMURLS) -P $(PARAMDIR) + touch $(PARAMDIR)/.download + +ofa: $(PARAMDIR)/.download + $(BUILDIMAGE) $(OFAPROJECT) -t $(OFAIMAGE) -f $(OFAPROJECT)/Dockerfile + +clip: $(PARAMDIR)/.download + $(BUILDIMAGE) $(CLIPPROJECT) -t $(CLIPIMAGE) -f $(CLIPPROJECT)/Dockerfile + +# TODO add clip, glip +clean: + @$(REMOVEIMAGE) $(OFAIMAGE) + +wipe: clean + rm -fr $(PARAMDIR) + +.PHONY: clean wipe ofa clip diff --git a/jsk_perception/docker/clip/Dockerfile b/jsk_perception/docker/clip/Dockerfile new file mode 100644 index 0000000000..f824e1c377 --- /dev/null +++ b/jsk_perception/docker/clip/Dockerfile @@ -0,0 +1,16 @@ +FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel +ARG DEBIAN_FRONTEND=noninteractive +RUN apt -o Acquire::AllowInsecureRepositories=true update \ + && apt-get install -y \ + curl \ + git \ + libopencv-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +# use requirements.txt generetaed by `pip freeze | grep ==` in the container created on 2023-03-31 +COPY requirements.txt /tmp/requirements.txt +RUN pip install -r /tmp/requirements.txt +RUN pip install git+https://github.com/openai/CLIP.git +RUN mkdir -p /workspace/Clip +COPY server.py /workspace/Clip +ENTRYPOINT cd /workspace/Clip && python server.py \ No newline at end of file diff --git a/jsk_perception/docker/clip/requirements.txt b/jsk_perception/docker/clip/requirements.txt new file mode 100644 index 0000000000..74c63fe7c0 --- /dev/null +++ b/jsk_perception/docker/clip/requirements.txt @@ -0,0 +1,31 @@ +backcall==0.2.0 +brotlipy==0.7.0 +certifi==2020.12.5 +click==8.1.3 +conda==4.9.2 +conda-build==3.21.4 +decorator==4.4.2 +dnspython==2.1.0 +Flask==2.2.3 +ftfy==6.1.1 +importlib-metadata==6.1.0 +itsdangerous==2.1.2 +Jinja2==3.1.2 +MarkupSafe==2.1.2 +mkl-fft==1.2.0 +mkl-random==1.1.1 +mkl-service==2.3.0 +olefile==0.46 +opencv-python==4.7.0.72 +parso==0.7.0 +pkginfo==1.7.0 +pycosat==0.6.3 +python-etcd==0.4.5 +PyYAML==5.3.1 +regex==2023.3.23 +ruamel-yaml==0.15.87 +torch==1.7.1 +torchelastic==0.2.1 +torchvision==0.8.2 +Werkzeug==2.2.3 +zipp==3.15.0 diff --git a/jsk_perception/docker/clip/server.py b/jsk_perception/docker/clip/server.py new file mode 100644 index 0000000000..c26d32db62 --- /dev/null +++ b/jsk_perception/docker/clip/server.py @@ -0,0 +1,67 @@ +import clip +import cv2 +import numpy as np +import os +from PIL import Image as PLImage +import torch + +# web server +from flask import Flask, request, Response +import json +import base64 + + +def apply_half(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.half) + return t + +class Inference: + def __init__(self, gpu_id=None): + self.gpu_id = gpu_id + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model, self.preprocess = clip.load('ViT-B/32', self.device) + + def infer(self, img, texts): + # get cv2 image + image = cv2.resize(img, dsize=(640, 480)) # NOTE forcely + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = PLImage.fromarray(image) + image_input = self.preprocess(image).unsqueeze(0).to(self.device) + text_inputs = torch.cat([clip.tokenize(c) for c in texts]).to(self.device) + with torch.no_grad(): + image_features = self.model.encode_image(image_input) + text_features = self.model.encode_text(text_inputs) + image_features /= image_features.norm(dim=-1, keepdim=True) + text_features /= text_features.norm(dim=-1, keepdim=True) + probability = (100.0 * image_features @ text_features.T).softmax(dim=-1) + similarity = (text_features.cpu().numpy() @ image_features.cpu().numpy().T).T[0] # cosine similarity + values, indices = probability[0].topk(len(texts)) + results = {} + for value, index in zip(values, indices): + results[texts[index]] = (value.item(), float(similarity[index])) + return results + +# run +if __name__ == "__main__": + app = Flask(__name__) + infer = Inference() + + @app.route("/inference", methods=['POST']) + def caption_request(): + data = request.data.decode("utf-8") + data_json = json.loads(data) + # process image + image_b = data_json['image'] + image_dec = base64.b64decode(image_b) + data_np = np.fromstring(image_dec, dtype='uint8') + img = cv2.imdecode(data_np, 1) + # get text + texts = data_json['queries'] + infer_results = infer.infer(img, texts) + results = [] + for q in infer_results: + results.append({"question": q, "probability": infer_results[q][0], "similarity": infer_results[q][1]}) + return Response(response=json.dumps({"results": results}), status=200) + + app.run("0.0.0.0", 8080, threaded=True) diff --git a/jsk_perception/docker/ofa/Dockerfile b/jsk_perception/docker/ofa/Dockerfile new file mode 100644 index 0000000000..31ff077074 --- /dev/null +++ b/jsk_perception/docker/ofa/Dockerfile @@ -0,0 +1,21 @@ +FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-devel +ARG DEBIAN_FRONTEND=noninteractive +RUN apt -o Acquire::AllowInsecureRepositories=true update \ + && apt-get install -y \ + curl \ + git \ + libopencv-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +# use requirements.txt generetaed by `pip freeze | grep ==` in the container created on 2023-03-31 +COPY requirements.txt /tmp/requirements.txt +RUN sed -i 's/pycocotools==2\.0\.4/pycocotools==2.0.6/g' /tmp/requirements.txt && pip install -r /tmp/requirements.txt +RUN git clone https://github.com/OFA-Sys/OFA \ + && mkdir -p /var/mount +RUN cd OFA \ + && git checkout 1809b55 \ + && sed -i 's/pycocotools==2\.0\.4/pycocotools==2.0.6/g' requirements.txt \ + && pip install -r requirements.txt \ + && pip install "protobuf<3.20.0" "numpy>=1.20.3,<1.24" "pytorch-lightning==1.7.7" "torch==1.8.1" flask +COPY server.py /workspace/OFA +ENTRYPOINT cd /workspace/OFA && python server.py diff --git a/jsk_perception/docker/ofa/requirements.txt b/jsk_perception/docker/ofa/requirements.txt new file mode 100644 index 0000000000..25f0bd0144 --- /dev/null +++ b/jsk_perception/docker/ofa/requirements.txt @@ -0,0 +1,96 @@ +absl-py==1.4.0 +aiohttp==3.8.4 +aiosignal==1.3.1 +antlr4-python3-runtime==4.8 +async-timeout==4.0.2 +attrs==22.2.0 +bitarray==2.7.3 +brotlipy==0.7.0 +cachetools==5.3.0 +certifi==2020.12.5 +charset-normalizer==3.1.0 +click==8.1.3 +colorama==0.4.6 +conda==4.9.2 +conda-build==3.21.4 +contourpy==1.0.7 +cycler==0.11.0 +Cython==0.29.33 +datasets==2.11.0 +dill==0.3.6 +dnspython==2.1.0 +einops==0.6.0 +Flask==2.2.3 +fonttools==4.39.3 +frozenlist==1.3.3 +fsspec==2023.3.0 +ftfy==6.0.3 +google-auth==2.17.1 +google-auth-oauthlib==0.4.6 +grpcio==1.53.0 +huggingface-hub==0.13.3 +hydra-core==1.0.7 +importlib-metadata==6.1.0 +importlib-resources==5.12.0 +itsdangerous==2.1.2 +jedi==0.17.0 +Jinja2==3.1.2 +joblib==1.2.0 +kiwisolver==1.4.4 +lightning-utilities==0.8.0 +lxml==4.9.2 +Markdown==3.4.3 +MarkupSafe==2.1.2 +matplotlib==3.7.1 +mkl-fft==1.3.0 +mkl-random==1.1.1 +mkl-service==2.3.0 +multidict==6.0.4 +multiprocess==0.70.14 +nltk==3.8.1 +numpy==1.23.5 +oauthlib==3.2.2 +olefile==0.46 +omegaconf==2.0.6 +opencv-python==4.7.0.72 +packaging==23.0 +pandas==1.5.3 +pkginfo==1.7.0 +portalocker==2.7.0 +protobuf==3.19.6 +pyarrow==11.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycocoevalcap==1.2 +pycocotools==2.0.4 +pycosat==0.6.3 +pyDeprecate==0.3.2 +pyparsing==3.0.9 +python-dateutil==2.8.2 +python-etcd==0.4.5 +pytorch-lightning==1.7.7 +PyYAML==5.4.1 +regex==2023.3.23 +requests-oauthlib==1.3.1 +responses==0.18.0 +rouge-score==0.1.2 +rsa==4.9 +ruamel-yaml==0.15.87 +sacrebleu==2.3.1 +tabulate==0.9.0 +tensorboard==2.12.0 +tensorboard-data-server==0.7.0 +tensorboard-plugin-wit==1.8.1 +tensorboardX==2.4.1 +timm==0.6.13 +torch==1.8.1 +torchelastic==0.2.2 +torchmetrics==0.11.4 +torchtext==0.9.1 +torchvision==0.9.1 +tqdm==4.65.0 +typing-extensions==4.5.0 +Werkzeug==2.2.3 +xxhash==3.2.0 +yarl==1.8.2 +zipp==3.15.0 diff --git a/jsk_perception/docker/ofa/server.py b/jsk_perception/docker/ofa/server.py new file mode 100644 index 0000000000..6c667e3328 --- /dev/null +++ b/jsk_perception/docker/ofa/server.py @@ -0,0 +1,278 @@ +# sys +import os +# inference +import torch +import numpy as np +import cv2 +from fairseq import utils, tasks +from fairseq import checkpoint_utils +from fairseq import options +from fairseq.dataclass.utils import convert_namespace_to_omegaconf +from utils.eval_utils import eval_step +from utils.zero_shot_utils import zero_shot_step +from tasks.mm_tasks.caption import CaptionTask +from tasks.mm_tasks.refcoco import RefcocoTask +from tasks.mm_tasks.vqa_gen import VqaGenTask +from models.ofa import OFAModel +from torchvision import transforms +from PIL import Image +# web server +from flask import Flask, request, Response +import json +import base64 + +PARAM_DIR = "/var/mount/params" +# OFA_PARAM[TASK][SCALE] +OFA_PARAM = { + "caption":{ + "large":"caption_large_best_clean.pt", + "huge":"caption_huge_best.pt" + }, + "refcoco":{ + "large":"refcocog_large_best.pt", + "huge":"refcocog_large_best.pt" + }, + "vqa_gen":{ + "large":"vqa_large_best.pt", + "huge":"vqa_large_best.pt" + } +} + +def apply_half(t): + if t.dtype is torch.float32: + return t.to(dtype=torch.half) + return t + +class Inference: + def __init__(self, task, model_scale): + self.use_cuda = torch.cuda.is_available() + self.use_fp16 = False + + # set params + param = OFA_PARAM[task][model_scale] + param_path = os.path.join(PARAM_DIR, param) + overrides={"bpe_dir":"utils/BPE", "eval_cider":False, "beam":5, + "max_len_b":16, "no_repeat_ngram_size":3, "seed":7} + + self.task_name = task + if task == "caption": + tasks.register_task(task, CaptionTask) + self.models, self.cfg, self.task = checkpoint_utils.load_model_ensemble_and_task( + utils.split_paths(param_path), + arg_overrides=overrides) + elif task == "refcoco": + tasks.register_task(self.task, RefcocoTask) + self.models, self.cfg, self.task = checkpoint_utils.load_model_ensemble_and_task( + utils.split_paths(param_path), + arg_overrides=overrides) + self.cfg.common.seed = 7 + self.cfg.generation.beam = 5 + self.cfg.generation.min_len = 4 + self.cfg.generation.max_len_a = 0 + self.cfg.generation.max_len_b = 4 + self.cfg.generation.no_repeat_ngram_size = 3 + if self.cfg.common.seed is not None and not self.cfg.generation.no_seed_provided: + np.random.seed(self.cfg.common.seed) + utils.set_torch_seed(self.cfg.common.seed) + elif task == "vqa_gen": + tasks.register_task('vqa_gen', VqaGenTask) + parser = options.get_generation_parser() + input_args = ["", "--task=vqa_gen", "--beam=100", "--unnormalized", "--path={}".format(param_path), "--bpe-dir=utils/BPE"] + args = options.parse_args_and_arch(parser, input_args) + cfg = convert_namespace_to_omegaconf(args) + self.task = tasks.setup_task(cfg.task) + self.models, self.cfg = checkpoint_utils.load_model_ensemble( + utils.split_paths(cfg.common_eval.path), + task=self.task) + else: + raise RuntimeError("Please select models from caption, refcoco, vqa_gen") + return + + # Move models to GPU + for model in self.models: + model.eval() + if self.use_fp16: + model.half() + if self.use_cuda and not self.cfg.distributed_training.pipeline_model_parallel: + model.cuda() + model.prepare_for_inference_(self.cfg) + + # Image transform + self.generator = self.task.build_generator(self.models, self.cfg.generation) + mean = [0.5, 0.5, 0.5] + std = [0.5, 0.5, 0.5] + self.patch_resize_transform = transforms.Compose([ + lambda image: image.convert("RGB"), + transforms.Resize((self.cfg.task.patch_image_size, self.cfg.task.patch_image_size), interpolation=Image.BICUBIC), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + + self.pad_idx = self.task.src_dict.pad() + + def visual_grounding(self, Image, Text): + sample = self.construct_sample(Image, Text.lower()) + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + sample = utils.apply_to_sample(apply_half, sample) if self.use_fp16 else sample + with torch.no_grad(): + result, scores = eval_step(self.task, self.generator, self.models, sample) + img = np.asarray(Image) + cv2.rectangle( + img, + (int(result[0]["box"][0]), int(result[0]["box"][1])), + (int(result[0]["box"][2]), int(result[0]["box"][3])), + (0, 255, 0), 3) + return img + + def encode_text(self, text, length=None, append_bos=False, append_eos=False): + bos_item = torch.LongTensor([self.task.src_dict.bos()]) + eos_item = torch.LongTensor([self.task.src_dict.eos()]) + # pad_idx = self.task.src_dict.pad() + s = self.task.tgt_dict.encode_line( + line=self.task.bpe.encode(text), + add_if_not_exist=False, + append_eos=False).long() + if length is not None: + s = s[:length] + if append_bos: + s = torch.cat([bos_item, s]) + if append_eos: + s = torch.cat([s, eos_item]) + return s + + def construct_sample(self, image, text): + if self.task_name == "caption" or self.task_name == "vqa_gen": + patch_image = self.patch_resize_transform(image).unsqueeze(0) + patch_mask = torch.tensor([True]) + src_text = self.encode_text(" " + text, append_bos=True, append_eos=True).unsqueeze(0) + src_length = torch.LongTensor([s.ne(self.pad_idx).long().sum() for s in src_text]) + if self.task_name == "caption": + sample = { + "id":np.array(['42']), + "net_input": { + "src_tokens": src_text, + "src_lengths": src_length, + "patch_images": patch_image, + "patch_masks": patch_mask + } + } + elif self.task_name == "vqa_gen": + ref_dict = np.array([{'yes': 1.0}]) # just placeholder + sample = { + "id":np.array(['42']), + "net_input": { + "src_tokens": src_text, + "src_lengths": src_length, + "patch_images": patch_image, + "patch_masks": patch_mask + }, + "ref_dict": ref_dict, + } + return sample + elif self.task_name == "refcoco": + patch_image_size = self.cfg.task.patch_image_size + w, h = image.size + w_resize_ratio = torch.tensor(patch_image_size / w).unsqueeze(0) + h_resize_ratio = torch.tensor(patch_image_size / h).unsqueeze(0) + patch_image = self.patch_resize_transform(image).unsqueeze(0) + patch_mask = torch.tensor([True]) + src_text = self.encode_text(' which region does the text " {} " describe?'.format(text), append_bos=True, + append_eos=True).unsqueeze(0) + src_length = torch.LongTensor([s.ne(self.pad_idx).long().sum() for s in src_text]) + sample = { + "id": np.array(['42']), + "net_input": { + "src_tokens": src_text, + "src_lengths": src_length, + "patch_images": patch_image, + "patch_masks": patch_mask, + }, + "w_resize_ratios": w_resize_ratio, + "h_resize_ratios": h_resize_ratio, + "region_coords": torch.randn(1, 4) + } + return sample + + def infer(self, img, text): + # get cv2 image + if self.task_name == "caption" or self.task_name == "vqa_gen": + image = cv2.resize(img, dsize=(640, 480)) # NOTE forcely + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = Image.fromarray(image) + # Construct input sample & preprocess for GPU if cuda available + sample = self.construct_sample(image, text) + sample = utils.move_to_cuda(sample) if self.use_cuda else sample + sample = utils.apply_to_sample(apply_half, sample) if self.use_fp16 else sample + if self.task_name == "caption": + with torch.no_grad(): + result, scores = eval_step(self.task, self.generator, self.models, sample) + text = result[0]['caption'] + return text + elif self.task_name == "vqa_gen": + with torch.no_grad(): + result, scores = zero_shot_step(self.task, self.generator, self.models, sample) + text = result[0]['answer'] + return text + elif self.task_name == "refcoco": + pass + +# run +if __name__ == "__main__": + app = Flask(__name__) + ofa_task = os.environ["OFA_TASK"] + ofa_model_scale = os.environ["OFA_MODEL_SCALE"] + # TODO add refcoco + if ofa_task == "all": + caption_infer = Inference("caption", ofa_model_scale) + vqa_infer = Inference("vqa_gen", ofa_model_scale) + + elif ofa_task == "caption": + caption_infer = Inference("caption", ofa_model_scale) + + elif ofa_task == "vqa_gen": + vqa_infer = Inference("vqa_gen", ofa_model_scale) + + else: + raise RuntimeError("No application is available") + + try: + @app.route("/caption", methods=['POST']) + def caption_request(): + data = request.data.decode("utf-8") + data_json = json.loads(data) + # process image + image_b = data_json['image'] + image_dec = base64.b64decode(image_b) + data_np = np.fromstring(image_dec, dtype='uint8') + img = cv2.imdecode(data_np, 1) + # get text + texts = data_json['queries'] + results = [] + for text in texts: + answer = caption_infer.infer(img, text) + results.append({"question": text, "answer": answer}) + return Response(response=json.dumps({"results": results}), status=200) + except NameError: + print("Skipping create caption app") + + try: + @app.route("/vqa_gen", methods=['POST']) + def vqa_request(): + data = request.data.decode("utf-8") + data_json = json.loads(data) + # process image + image_b = data_json['image'] + image_dec = base64.b64decode(image_b) + data_np = np.fromstring(image_dec, dtype='uint8') + img = cv2.imdecode(data_np, 1) + # get text + texts = data_json['queries'] + results = [] + for text in texts: + answer = vqa_infer.infer(img, text) + results.append({"question": text, "answer": answer}) + return Response(response=json.dumps({"results": results}), status=200) + except NameError: + print("Skipping create vqa_gen app") + + app.run("0.0.0.0", 8080, threaded=True) diff --git a/jsk_perception/docker/parameter_urls.txt b/jsk_perception/docker/parameter_urls.txt new file mode 100644 index 0000000000..361481ee8c --- /dev/null +++ b/jsk_perception/docker/parameter_urls.txt @@ -0,0 +1,4 @@ +https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_huge_best.pt +https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt +https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/refcocog_large_best.pt +https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/vqa_large_best.pt diff --git a/jsk_perception/docker/run_jsk_vil_api b/jsk_perception/docker/run_jsk_vil_api new file mode 100755 index 0000000000..acef636280 --- /dev/null +++ b/jsk_perception/docker/run_jsk_vil_api @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +import argparse +import os +try: + import rospy + import_rospy = True +except ImportError: + import_rospy = False +import subprocess +import sys + +CONTAINERS = {"ofa": "jsk-ofa-server", + "clip": "jsk-clip-server"} +OFA_MODEL_SCALES = ["base", "large", "huge"] + +parser = argparse.ArgumentParser(description="JSK Vision and Language API runner") +parser.add_argument("model", choices=CONTAINERS.keys(), + help="Vision and Language model you want to run", type=str) +parser.add_argument("-p", "--port", default=8888, help="Port of the API server", type=int) +parser.add_argument("--ofa_task", default="caption", + help="Tasks to be loaded in OFA. OFA loads all tasks by default and requires large amount of GPU RAM") +parser.add_argument("--ofa_model_scale", default="huge", choices=OFA_MODEL_SCALES, + help="Scale of parameters to be read in OFA. Use huge by default") + +args, unknown = parser.parse_known_args() + +if __name__ == "__main__": + if import_rospy and len(sys.argv) != len(rospy.myargv()): # roslaunch + rospy.init_node("jsk_vil_docker_manager") + port_option = "{}:8080".format(str(args.port)) + _ros_home = os.getenv("ROS_HOME", os.path.join(os.environ["HOME"],".ros")) + mount_option = "{}:/var/mount/params".format(os.path.join(_ros_home, "data", "jsk_perception", "vil_params")) + docker_image = "{}:latest".format(CONTAINERS[args.model]) + + cmd = ["docker", "run", "--rm", "-it", "--gpus", "all", + "-p", port_option, + "-v", mount_option] + + # OFA + if args.model == "ofa": + ofa_option = ["-e", "OFA_TASK={}".format(args.ofa_task), + "-e", "OFA_MODEL_SCALE={}".format(args.ofa_model_scale)] + cmd += ofa_option + + # Execute + print("Run {} container on port {}".format(docker_image, args.port)) + cmd.append(docker_image) + subprocess.run(cmd) diff --git a/jsk_perception/include/jsk_perception/invert_mask_image.h b/jsk_perception/include/jsk_perception/invert_mask_image.h new file mode 100644 index 0000000000..6d6ce4b03f --- /dev/null +++ b/jsk_perception/include/jsk_perception/invert_mask_image.h @@ -0,0 +1,66 @@ +// -*- mode: c++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2022, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ + + +#ifndef JSK_PERCEPTION_INVERT_MASK_IMAGE_H_ +#define JSK_PERCEPTION_INVERT_MASK_IMAGE_H_ + +#include +#include +#include + + +namespace jsk_perception +{ + class InvertMaskImage: public jsk_topic_tools::DiagnosticNodelet + { + public: + InvertMaskImage(): DiagnosticNodelet("InvertMaskImage") {} + virtual ~InvertMaskImage(); + protected: + + virtual void onInit(); + virtual void subscribe(); + virtual void unsubscribe(); + virtual void invert(const sensor_msgs::Image::ConstPtr& mask_msg); + + boost::mutex mutex_; + ros::Subscriber sub_mask_; + ros::Publisher pub_mask_; + private: + }; +} + +#endif diff --git a/jsk_perception/include/jsk_perception/point_pose_extractor.h b/jsk_perception/include/jsk_perception/point_pose_extractor.h new file mode 100644 index 0000000000..ddcdfd00ed --- /dev/null +++ b/jsk_perception/include/jsk_perception/point_pose_extractor.h @@ -0,0 +1,706 @@ +// -*- mode: c++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2015, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ +#ifndef JSK_PERCEPTION_POINT_POSE_EXTRACTOR_H_ +#define JSK_PERCEPTION_POINT_POSE_EXTRACTOR_H_ + +#include +#include +#include +#include +#if ( CV_MAJOR_VERSION >= 4) +#include +#include +#include +#else +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace enc = sensor_msgs::image_encodings; + +void features2keypoint (posedetection_msgs::Feature0D features, + std::vector& keypoints, + cv::Mat& descriptors){ + keypoints.resize(features.scales.size()); + descriptors.create(features.scales.size(),features.descriptor_dim,CV_32FC1); + std::vector::iterator keypoint_it = keypoints.begin(); + for ( int i=0; keypoint_it != keypoints.end(); ++keypoint_it, ++i ) { + *keypoint_it = cv::KeyPoint(cv::Point2f(features.positions[i*2+0], + features.positions[i*2+1]), + features.descriptor_dim, // size + features.orientations[i], //angle + 0, // resonse + features.scales[i] // octave + ); + for (int j = 0; j < features.descriptor_dim; j++){ + descriptors.at(i,j) = + features.descriptors[i*features.descriptor_dim+j]; + } + } +} + + +class Matching_Template +{ +public: + std::string _matching_frame; + cv::Mat _template_img; + std::vector _template_keypoints; + cv::Mat _template_descriptors; + int _original_width_size; + int _original_height_size; + double _template_width; // width of template [m] + double _template_height; // height of template [m] + tf::Transform _relativepose; + cv::Mat _affine_matrix; + std::string _window_name; + // The maximum allowed reprojection error to treat a point pair as an inlier + double _reprojection_threshold; + // threshold on squared ratio of distances between NN and 2nd NN + double _distanceratio_threshold; + std::vector _correspondances; + cv::Mat _previous_stack_img; + + Matching_Template(){ + } + Matching_Template(cv::Mat img, + std::string matching_frame, + int original_width_size, + int original_height_size, + double template_width, + double template_height, + tf::Transform relativepose, + cv::Mat affine_matrix, + double reprojection_threshold, + double distanceratio_threshold, + std::string window_name, + bool autosize){ + + _template_img = img.clone(); + _matching_frame = matching_frame; + _original_width_size = original_width_size; + _original_height_size = original_height_size; + _template_width = template_width; + _template_height = template_height; + _relativepose = relativepose; + _affine_matrix = affine_matrix; + _window_name = window_name; + _reprojection_threshold = reprojection_threshold; + _distanceratio_threshold = distanceratio_threshold; + _template_keypoints.clear(); + _correspondances.clear(); + // cv::namedWindow(_window_name, autosize ? CV_WINDOW_AUTOSIZE : 0); + } + + + virtual ~Matching_Template(){ + std::cerr << "delete " << _window_name << std::endl; + } + + + std::string get_window_name(){ + return _window_name; + } + + std::vector* correspondances(){ + return &_correspondances; + } + + bool check_template (){ + if (_template_img.empty() && _template_keypoints.size() == 0 && + _template_descriptors.empty()){ + return false; + } + else { + return true; + } + } + + + int set_template(ros::ServiceClient client){ + posedetection_msgs::Feature0DDetect srv; + + if ( _template_img.empty()) { + ROS_ERROR ("template picture is empty."); + return -1; + } + ROS_INFO_STREAM("read template image and call " << client.getService() << " service"); + cv_bridge::CvImage cv_img; + cv_img.image = cv::Mat(_template_img); + cv_img.encoding = std::string("bgr8"); + srv.request.image = *(cv_img.toImageMsg()); + if (client.call(srv)) { + ROS_INFO_STREAM("get features with " << srv.response.features.scales.size() << " descriptoins"); + features2keypoint (srv.response.features, _template_keypoints, _template_descriptors); + + // inverse affine translation + cv::Mat M_inv; + cv::Mat dst; + cv::invert(_affine_matrix, M_inv); + // cv::drawKeypoints (tmp, _template_keypoints, dst, cv::Scalar::all(1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + // cv::drawKeypoints (tmp, _template_keypoints, dst); + // cv::warpPerspective(_template_img, dst, M_inv, + // cv::Size(_original_width_size, _original_height_size), + // CV_INTER_LINEAR, IPL_BORDER_CONSTANT, 0); + for (int i = 0; i < (int)_template_keypoints.size(); i++){ + cv::Point2f pt; + cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt); + cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &_template_keypoints.at(i).pt); + cv::perspectiveTransform (pt_src_mat, pt_mat, M_inv); + _template_keypoints.at(i).pt = pt_mat.at(0,0); + } + // cvSetMouseCallback (_window_name.c_str(), &PointPoseExtractor::cvmousecb, this); + // _template_img = dst; + return 0; + } else { + ROS_ERROR("Failed to call service Feature0DDetect"); + return 1; + } + } + + + double log_fac( int n ) + { + static std::vector slog_table; + int osize = slog_table.size(); + if(osize <= n){ + slog_table.resize(n+1); + if(osize == 0){ + slog_table[0] = -1; + slog_table[1] = log(1.0); + osize = 2; + } + for(int i = osize; i <= n; i++ ){ + slog_table[i] = slog_table[i-1] + log(i); + } + } + return slog_table[n]; + } + + int min_inlier( int n, int m, double p_badsupp, double p_badxform ) + { + double pi, sum; + int i, j; + double lp_badsupp = log( p_badsupp ); + double lp_badsupp1 = log( 1.0 - p_badsupp ); + for( j = m+1; j <= n; j++ ){ + sum = 0; + for( i = j; i <= n; i++ ){ + pi = (i-m) * lp_badsupp + (n-i+m) * lp_badsupp1 + + log_fac( n - m ) - log_fac( i - m ) - log_fac( n - i ); + sum += exp( pi ); + } + if( sum < p_badxform ) + break; + } + return j; + } + + + bool estimate_od (ros::ServiceClient client, cv::Mat src_img, + std::vector sourceimg_keypoints, + image_geometry::PinholeCameraModel pcam, + double err_thr, cv::Mat &stack_img, + cv::flann::Index* ft, posedetection_msgs::Object6DPose* o6p){ + + if ( _template_keypoints.size()== 0 && + _template_descriptors.empty() ){ + set_template(client); + } + if ( _template_keypoints.size()== 0 && + _template_descriptors.empty() ) { + ROS_ERROR ("Template image was not set."); + return false; + } + // stacked image + cv::Size stack_size = cv::Size(MAX(src_img.cols,_template_img.cols), + src_img.rows+_template_img.rows); + stack_img = cv::Mat(stack_size,CV_8UC3); + stack_img = cv::Scalar(0); + cv::Mat stack_img_tmp(stack_img,cv::Rect(0,0,_template_img.cols,_template_img.rows)); + cv::add(stack_img_tmp, _template_img, stack_img_tmp); + cv::Mat stack_img_src(stack_img,cv::Rect(0,_template_img.rows,src_img.cols,src_img.rows)); + cv::add(stack_img_src, src_img, stack_img_src); + _previous_stack_img = stack_img.clone(); + + // matching + cv::Mat m_indices(_template_descriptors.rows, 2, CV_32S); + cv::Mat m_dists(_template_descriptors.rows, 2, CV_32F); + ft->knnSearch(_template_descriptors, m_indices, m_dists, 2, cv::flann::SearchParams(-1) ); + + // matched points + std::vector pt1, pt2; + std::vector queryIdxs,trainIdxs; + for ( unsigned int j = 0; j < _template_keypoints.size(); j++ ) { + if ( m_dists.at(j,0) < m_dists.at(j,1) * _distanceratio_threshold) { + queryIdxs.push_back(j); + trainIdxs.push_back(m_indices.at(j,0)); + } + } + if ( queryIdxs.size() == 0 ) { + ROS_WARN_STREAM("could not find matched points with distanceratio(" <<_distanceratio_threshold << ")"); + } else { + cv::KeyPoint::convert(_template_keypoints,pt1,queryIdxs); + cv::KeyPoint::convert(sourceimg_keypoints,pt2,trainIdxs); + } + + ROS_INFO ("Found %d total matches among %d template keypoints", (int)pt2.size(), (int)_template_keypoints.size()); + + cv::Mat H; + std::vector mask((int)pt2.size()); + + if ( pt1.size() > 4 ) { + // ToDO for curve face + H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2), mask, CV_RANSAC, _reprojection_threshold); + } + + // draw line + for (int j = 0; j < (int)pt1.size(); j++){ + cv::Point2f pt, pt_orig; + cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt); + cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &pt1.at(j)); + cv::perspectiveTransform (pt_src_mat, pt_mat, _affine_matrix); + pt_orig = pt_mat.at(0,0); + if ( mask.at(j)){ + cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows), + CV_RGB(0,255,0), 1,8,0); + } + else { + cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows), + CV_RGB(255,0,255), 1,8,0); + } + } + int inlier_sum = 0; + for (int k=0; k < (int)mask.size(); k++){ + inlier_sum += (int)mask.at(k); + } + + double text_scale = 1.5; + { + int fontFace = 0, thickness = 0, baseLine; + int x, y; + cv::Size text_size; + std::string text; + + text = "inlier: " + boost::lexical_cast((int)inlier_sum) + " / " + boost::lexical_cast((int)pt2.size()); + text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine); + x = stack_img.size().width - text_size.width; + y = text_size.height + thickness + 10; // 10pt pading + cv::putText (stack_img, text, cv::Point(x, y), + fontFace, text_scale, CV_RGB(0, 255, 0), + thickness, 8, false); + + text = "template: " + boost::lexical_cast((int)_template_keypoints.size()); + text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine); + x = stack_img.size().width - text_size.width; + y += text_size.height + thickness + 10; // 10pt pading + cv::putText (stack_img, text, cv::Point(x, y), + fontFace, text_scale, CV_RGB(0, 255, 0), + thickness, 8, false); + } + + // draw correspondances + ROS_INFO(" _correspondances.size: %d", (int)_correspondances.size()); + for (int j = 0; j < (int)_correspondances.size(); j++){ + cv::circle(stack_img, cv::Point2f(_correspondances.at(j).x, _correspondances.at(j).y + _template_img.size().height), + 8, CV_RGB(255,0,0), -1); + } + + ROS_INFO(" inlier_sum:%d min_lier:%d", inlier_sum, min_inlier((int)pt2.size(), 4, 0.10, 0.01)); + if ((cv::countNonZero( H ) == 0) || (inlier_sum < min_inlier((int)pt2.size(), 4, 0.10, 0.01))){ + ROS_INFO(" inlier_sum < min_lier return-from estimate-od"); + if( _window_name != "" ) + cv::imshow(_window_name, stack_img); + return false; + } + + std::string _type; + char chr[20]; + + // number of match points + sprintf(chr, "%d", (int)pt2.size()); + _type = _matching_frame + "_" + std::string(chr); + + sprintf(chr, "%d", inlier_sum); + _type = _type + "_" + std::string(chr); + + cv::Point2f corners2d[4] = {cv::Point2f(0,0), + cv::Point2f(_original_width_size,0), + cv::Point2f(_original_width_size,_original_height_size), + cv::Point2f(0,_original_height_size)}; + cv::Mat corners2d_mat (cv::Size(4, 1), CV_32FC2, corners2d); + cv::Point3f corners3d[4] = {cv::Point3f(0,0,0), + cv::Point3f(0,_template_width,0), + cv::Point3f(_template_height,_template_width,0), + cv::Point3f(_template_height,0,0)}; + cv::Mat corners3d_mat (cv::Size(4, 1), CV_32FC3, corners3d); + + cv::Mat corners2d_mat_trans; + + cv::perspectiveTransform (corners2d_mat, corners2d_mat_trans, H); + + double fR3[3], fT3[3]; + cv::Mat rvec(3, 1, CV_64FC1, fR3); + cv::Mat tvec(3, 1, CV_64FC1, fT3); + cv::Mat zero_distortion_mat = cv::Mat::zeros(4, 1, CV_64FC1); + + cv::solvePnP (corners3d_mat, corners2d_mat_trans, + pcam.intrinsicMatrix(), + zero_distortion_mat,//if unrectified: pcam.distortionCoeffs() + rvec, tvec); + + tf::Transform checktf, resulttf; + + checktf.setOrigin( tf::Vector3(fT3[0], fT3[1], fT3[2] ) ); + + double rx = fR3[0], ry = fR3[1], rz = fR3[2]; + tf::Quaternion quat; + double angle = cv::norm(rvec); + quat.setRotation(tf::Vector3(rx/angle, ry/angle, rz/angle), angle); + checktf.setRotation( quat ); + + resulttf = checktf * _relativepose; + + ROS_INFO( " tx: (%0.2lf,%0.2lf,%0.2lf) rx: (%0.2lf,%0.2lf,%0.2lf)", + resulttf.getOrigin().getX(), + resulttf.getOrigin().getY(), + resulttf.getOrigin().getZ(), + resulttf.getRotation().getAxis().x() * resulttf.getRotation().getAngle(), + resulttf.getRotation().getAxis().y() * resulttf.getRotation().getAngle(), + resulttf.getRotation().getAxis().z() * resulttf.getRotation().getAngle()); + + o6p->pose.position.x = resulttf.getOrigin().getX(); + o6p->pose.position.y = resulttf.getOrigin().getY(); + o6p->pose.position.z = resulttf.getOrigin().getZ(); + o6p->pose.orientation.w = resulttf.getRotation().w(); + o6p->pose.orientation.x = resulttf.getRotation().x(); + o6p->pose.orientation.y = resulttf.getRotation().y(); + o6p->pose.orientation.z = resulttf.getRotation().z(); + o6p->type = _matching_frame; // _type + + // draw 3d cube model + std::vector projected_top; + { + tf::Vector3 coords[8] = {tf::Vector3(0,0,0), + tf::Vector3(0, _template_width, 0), + tf::Vector3(_template_height, _template_width,0), + tf::Vector3(_template_height, 0, 0), + tf::Vector3(0, 0, -0.03), + tf::Vector3(0, _template_width, -0.03), + tf::Vector3(_template_height, _template_width, -0.03), + tf::Vector3(_template_height, 0, -0.03)}; + + projected_top = std::vector(8); + + for(int i=0; i<8; i++) { + coords[i] = checktf * coords[i]; + cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ()); + projected_top[i] = pcam.project3dToPixel(pt); + } + } + + { // check if the matched region does not too big or too small + float max_x, max_y, min_x, min_y; + max_x = max_y = -1e9; + min_x = min_y = 1e9; + for (int j = 0; j < 4; j++){ + cv::Point2f pt = corners2d_mat_trans.at(0,j); + max_x = std::max(max_x, pt.x), max_y = std::max(max_y, pt.y); + min_x = std::min(min_x, pt.x), min_y = std::min(min_y, pt.y); + } + if((max_x - min_x) < 30 || (max_y - min_y) < 30 || + src_img.rows < (max_x - min_x)/2 || src_img.cols < (max_y - min_y)/2){ + ROS_INFO(" matched region is too big or small (2< && <30) width:%f height:%f return-from estimate-od", max_x - min_x, max_y - min_y); + return false; + } + } + + double err_sum = 0; + bool err_success = true; + for (int j = 0; j < 4; j++){ + double err = sqrt(pow((corners2d_mat_trans.at(0,j).x - projected_top.at(j).x), 2) + + pow((corners2d_mat_trans.at(0,j).y - projected_top.at(j).y), 2)); + err_sum += err; + } + if (err_sum > err_thr){ + ROS_INFO(" err_sum:%f > err_thr:%f return-from estimate-od", err_sum, err_thr); + err_success = false; + } else { + o6p->reliability = 1.0 - (err_sum / err_thr); + } + // draw lines around the detected object + for (int j = 0; j < corners2d_mat_trans.cols; j++){ + cv::Point2f p1(corners2d_mat_trans.at(0,j).x, + corners2d_mat_trans.at(0,j).y+_template_img.rows); + cv::Point2f p2(corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).x, + corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).y+_template_img.rows); + cv::line (stack_img, p1, p2, CV_RGB(255, 0, 0), + (err_success?4:1), // width + CV_AA, 0); + } + + // draw 3d cube model + if(projected_top.size() == 8) { // verify, the size is 8 + int cnt = 8; + std::vector ps(cnt); + for(int i=0; i ps(4); + + for(int i=0; i<4; i++) { + coords[i] = resulttf * coords[i]; + cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ()); + ps[i] = pcam.project3dToPixel(pt); + ps[i].y += _template_img.rows; // draw on camera image + } + + cv::line (stack_img, ps[0], ps[1], CV_RGB(255, 0, 0), 3, CV_AA, 0); + cv::line (stack_img, ps[0], ps[2], CV_RGB(0, 255, 0), 3, CV_AA, 0); + cv::line (stack_img, ps[0], ps[3], CV_RGB(0, 0, 255), 3, CV_AA, 0); + } + + // write text on image + { + std::string text; + int x, y; + text = "error: " + boost::lexical_cast(err_sum); + x = stack_img.size().width - 16*17*text_scale; // 16pt * 17 + y = _template_img.size().height - (16 + 2)*text_scale*6; + cv::putText (stack_img, text, cv::Point(x, y), + 0, text_scale, CV_RGB(0, 255, 0), + 2, 8, false); + ROS_INFO(" %s < %f (threshold)", text.c_str(), err_thr ); + } + // for debug window + if( _window_name != "" ) + cv::imshow(_window_name, stack_img); + + return err_success; + } +}; // the end of difinition of class Matching_Template + + +namespace jsk_perception +{ + + class PointPoseExtractor: public jsk_topic_tools::DiagnosticNodelet + { + public: + std::vector _templates; + typedef jsk_perception::point_pose_extractorConfig Config; + PointPoseExtractor() : DiagnosticNodelet("PointPoseExtractor") {} + virtual ~PointPoseExtractor(); + protected: + ros::NodeHandle _n; + image_transport::ImageTransport *it; + ros::Subscriber _sub; + ros::ServiceServer _server; + ros::ServiceClient _client; + ros::Publisher _pub, _pub_agg, _pub_pose; + tf::TransformBroadcaster _br; + image_transport::Publisher _debug_pub; + bool _first_sample_change; + double _reprojection_threshold; + double _distanceratio_threshold; + double _th_step; + double _phi_step; + bool _autosize; + double _err_thr; + image_geometry::PinholeCameraModel pcam; + bool pnod; + bool _initialized; + bool _viewer; + bool _publish_tf; + std::string _child_frame_id; + + virtual void initialize (); + virtual cv::Mat make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec, + double template_width, double template_height, cv::Size &size); + virtual int make_warped_images (cv::Mat src, std::vector &imgs, + std::vector &Mvec, + double template_width, double template_height, + double th_step, double phi_step); + virtual bool add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose, + double template_width, double template_height, + double theta_step, double phi_step); + virtual bool settemplate_cb (jsk_perception::SetTemplate::Request &req, + jsk_perception::SetTemplate::Response &res); + virtual void imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg); + virtual void onInit(); + virtual void subscribe(); + virtual void unsubscribe(); + virtual void dyn_conf_callback(Config &config, uint32_t level); + + static void cvmousecb (int event, int x, int y, int flags, void* param){ + PointPoseExtractor* ppe = static_cast(param); + Matching_Template *mt = ppe->_templates.back(); + // std::cerr << "mousecb_ -> " << mt << std::endl; + switch (event){ + case CV_EVENT_LBUTTONUP: { + cv::Point2d pt(x,y - (int)mt->_template_img.size().height); + ROS_INFO("add correspondence (%d, %d)", (int)pt.x, (int)pt.y); + mt->_correspondances.push_back(pt); + if ((int)mt->_correspondances.size() >= 4){ + make_template_from_mousecb(ppe); + mt->_correspondances.clear(); + ROS_INFO("reset"); + } + break; + } + case CV_EVENT_RBUTTONUP: { + mt->_correspondances.clear(); + ROS_INFO("reset"); + break; + } + } + } + static void make_template_from_mousecb(PointPoseExtractor* ppe){ + Matching_Template *mt = ppe->_templates.back(); + cv::Mat H; + cv::Mat tmp_template, tmp_warp_template; + std::vectorpt1, pt2; + double width, height; + std::string filename; + std::cout << "input template's [width]" << std::endl; + std::cin >> width; + std::cout << "input template's [height]" << std::endl; + std::cin >> height; + std::cout << "input template's [filename]" << std::endl; + std::cin >> filename; + + for (int i = 0; i < 4; i++){ + pt1.push_back(cv::Point2d((int)mt->_correspondances.at(i).x, + (int)mt->_correspondances.at(i).y + mt->_template_img.size().height)); + } + cv::Rect rect = cv::boundingRect(cv::Mat(pt1)); + double scale = std::max(width, height) / 500.0; + int iwidth = width / scale, iheight = height / scale; + pt2.push_back(cv::Point2d(0,0)); + pt2.push_back(cv::Point2d(iwidth,0)); + pt2.push_back(cv::Point2d(iwidth,iheight)); + pt2.push_back(cv::Point2d(0, iheight)); + H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2)); + + cv::getRectSubPix(mt->_previous_stack_img, rect.size(), + cv::Point2f((rect.tl().x + rect.br().x)/2.0,(rect.tl().y + rect.br().y)/2.0), + tmp_template); + cv::warpPerspective(mt->_previous_stack_img, tmp_warp_template, H, cv::Size(iwidth, iheight)); + + try { + cv::imwrite(filename,tmp_template); + boost::filesystem::path fname(filename); + std::stringstream ss; + ss << (fname.parent_path() / fname.stem()).c_str() << "_wrap" << fname.extension().c_str(); + cv::imwrite(ss.str(),tmp_warp_template); + }catch (cv::Exception e) { + std::cerr << e.what() << std::endl; + } + + for (int i = 0; i < (int)pt1.size(); i++){ + pt2.push_back(cv::Point2d((int)pt1.at(i).x - rect.x, + (int)pt1.at(i).y - rect.y - mt->_template_img.size().height)); + } + // cv::Mat mask_img = cv::Mat::zeros(tmp_template.size(), CV_8UC3); + // cv::fillConvexPoly(mask_img, pt2.begin(), (int)pt2.size(), CV_RGB(255, 255, 255)); + + // cv::namedWindow("hoge", 1); + // cv::imshow("hoge", mask_img); + + cv::Mat M = (cv::Mat_(3,3) << 1,0,0, 0,1,0, 0,0,1); + std::string window_name = "sample" + boost::lexical_cast((int)ppe->_templates.size()); + + Matching_Template* tmplt = + new Matching_Template (tmp_warp_template, "sample", + tmp_warp_template.size().width, tmp_warp_template.size().height, + width, height, + tf::Transform(tf::Quaternion(0, 0, 0, 1), tf::Vector3(0, 0, 0)), + M, + mt->_reprojection_threshold, + mt->_distanceratio_threshold, + ppe->_first_sample_change ? window_name : mt->_window_name, + cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE)); + + mt->_correspondances.clear(); + ppe->_templates.push_back(tmplt); + cv::namedWindow(ppe->_first_sample_change ? window_name : mt->_window_name, + cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE)); + cvSetMouseCallback (ppe->_first_sample_change ? window_name.c_str() : mt->_window_name.c_str(), + &cvmousecb, ppe); + ppe->_first_sample_change = true; + } + private: + }; +} + + +#endif diff --git a/jsk_perception/include/jsk_perception/split_image.h b/jsk_perception/include/jsk_perception/split_image.h new file mode 100644 index 0000000000..858e6cfde9 --- /dev/null +++ b/jsk_perception/include/jsk_perception/split_image.h @@ -0,0 +1,69 @@ +// -*- mode: c++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2023, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ +/* + * split_image.h + * Author: Yoshiki Obinata + */ + + +#ifndef SPLIT_IMAGE_H_ +#define SPLIT_IMAGE_H_ + +#include +#include +#include + +namespace jsk_perception{ + + class SplitImage : public jsk_topic_tools::DiagnosticNodelet{ + + public: + SplitImage() : DiagnosticNodelet("SplitImage"){} + virtual ~SplitImage(); + protected: + virtual void onInit(); + virtual void subscribe(); + virtual void unsubscribe(); + virtual void splitImage(const sensor_msgs::Image::ConstPtr& image_msg); + + ros::Subscriber sub_; + std::vector pubs_; + int vertical_parts_; + int horizontal_parts_; + private: + }; +}; + +#endif // SPLIT_IMAGE_H_ diff --git a/jsk_perception/include/jsk_perception/video_to_scene.h b/jsk_perception/include/jsk_perception/video_to_scene.h new file mode 100644 index 0000000000..15a9473c23 --- /dev/null +++ b/jsk_perception/include/jsk_perception/video_to_scene.h @@ -0,0 +1,81 @@ +// -*- mode: C++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2022, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ +/* + * video_to_scene.h + * Author: Kei Okada + */ + +#ifndef VIDEO_TO_SCENE_H_ +#define VIDEO_TO_SCENE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace jsk_perception{ + class VideoToScene: public jsk_topic_tools::DiagnosticNodelet{ + public: + typedef VideoToSceneConfig Config; + VideoToScene() : DiagnosticNodelet("VideoToScene") {} + protected: + virtual void onInit(); + virtual void subscribe(); + virtual void unsubscribe(); + virtual void work(const sensor_msgs::Image::ConstPtr& image_msg); + virtual void configCallback(Config &config, uint32_t level); + + boost::shared_ptr > srv_; + image_transport::Subscriber sub_; + boost::shared_ptr it_; + image_transport::Publisher pub_; + boost::mutex mutex_; + private: + cv::Ptr bgsubtractor; + int min_percent_; + int max_percent_; + bool captured_; + + }; +} + +#endif // VIDEO_TO_SCENE_H_ diff --git a/jsk_perception/launch/classification.launch b/jsk_perception/launch/classification.launch new file mode 100644 index 0000000000..fbf14fae0e --- /dev/null +++ b/jsk_perception/launch/classification.launch @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + host: $(arg host) + port: $(arg port) + image_transport: $(arg image_transport) + + + + + + diff --git a/jsk_perception/launch/ofa_gui.launch b/jsk_perception/launch/ofa_gui.launch new file mode 100644 index 0000000000..c0cba568db --- /dev/null +++ b/jsk_perception/launch/ofa_gui.launch @@ -0,0 +1,6 @@ + + + + + diff --git a/jsk_perception/launch/video_to_scene.launch b/jsk_perception/launch/video_to_scene.launch new file mode 100644 index 0000000000..c262c3ab2c --- /dev/null +++ b/jsk_perception/launch/video_to_scene.launch @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/jsk_perception/launch/vqa.launch b/jsk_perception/launch/vqa.launch new file mode 100644 index 0000000000..32b3988b4b --- /dev/null +++ b/jsk_perception/launch/vqa.launch @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + host: $(arg host) + port: $(arg port) + image_transport: $(arg image_transport) + + + + + + diff --git a/jsk_perception/node_scripts/alexnet_object_recognition.py b/jsk_perception/node_scripts/alexnet_object_recognition.py index 7722715d30..73fbdd88b7 100755 --- a/jsk_perception/node_scripts/alexnet_object_recognition.py +++ b/jsk_perception/node_scripts/alexnet_object_recognition.py @@ -30,7 +30,7 @@ from jsk_recognition_utils.chainermodels import AlexNetBatchNormalization import rospy from sensor_msgs.msg import Image -from vgg16_object_recognition import VGG16ObjectRecognition +from jsk_perception.vgg16_object_recognition import VGG16ObjectRecognition class AlexNetObjectRecognition(VGG16ObjectRecognition): diff --git a/jsk_perception/node_scripts/binpack_rect_array.py b/jsk_perception/node_scripts/binpack_rect_array.py index d7854b9efb..21ee0fb80b 100755 --- a/jsk_perception/node_scripts/binpack_rect_array.py +++ b/jsk_perception/node_scripts/binpack_rect_array.py @@ -24,6 +24,8 @@ def width_cmp(self, another): return self.rect.width - another.rect.width def __cmp__(self, another): # python2 return self.max_cmp(another) or self.min_cmp(another) or self.height_cmp(another) or self.width_cmp(another) or 0 + def __lt__(self, another): # python3 + return self.max_cmp(another) or self.min_cmp(another) or self.height_cmp(another) or self.width_cmp(another) or 0 class Packer(): def __init__(self): diff --git a/jsk_perception/node_scripts/bof_histogram_extractor.py b/jsk_perception/node_scripts/bof_histogram_extractor.py index 4b06167593..e8ccb05186 100755 --- a/jsk_perception/node_scripts/bof_histogram_extractor.py +++ b/jsk_perception/node_scripts/bof_histogram_extractor.py @@ -5,8 +5,12 @@ """ import gzip -import cPickle as pickle -from distutils.version import StrictVersion +import sys +if sys.version_info.major <= 2: + import cPickle as pickle +else: # for python3 + import _pickle as pickle +from distutils.version import LooseVersion from pkg_resources import get_distribution import numpy as np @@ -34,14 +38,20 @@ def __init__(self): if bof_data is None: quit() with gzip.open(bof_data, 'rb') as f: - self.bof = pickle.load(f) - if (StrictVersion(get_distribution('scikit-learn').version) >= - StrictVersion('0.17.0')): + if sys.version_info.major <= 2: + self.bof = pickle.load(f) + else: + self.bof = pickle.load(f, encoding='latin1') + if (LooseVersion(get_distribution('scikit-learn').version) >= + LooseVersion('0.17')): if 'n_jobs' not in self.bof.nn.__dict__ or not isinstance(self.bof.nn.n_jobs, int): # In scikit-learn>=0.17.0, # sklearn.neighbors.NearestNeighbors needs 'n_jobs' attribute. # https://github.com/jsk-ros-pkg/jsk_recognition/issues/1669 self.bof.nn.n_jobs = 1 + # noetic uses newer scikit-learn which uses n_samples_fit_ + if 'n_ssamples_fit_' not in self.bof.nn.__dict__: + self.bof.nn.n_samples_fit_ = self.bof.nn._fit_X.shape[0] self._pub = self.advertise('~output', VectorArray, queue_size=1) rospy.loginfo('Initialized BoF histogram extractor') @@ -79,7 +89,7 @@ def _apply(self, feature_msg, label_msg): decomposed = decompose_descriptors_with_label( descriptors=desc, positions=pos, label_img=label, skip_zero_label=True) - X = np.array(decomposed.values()) + X = np.array(list(decomposed.values())) if X.size == 0: return X = self.bof.transform(X) diff --git a/jsk_perception/node_scripts/classification_node.py b/jsk_perception/node_scripts/classification_node.py new file mode 100755 index 0000000000..5c9debf692 --- /dev/null +++ b/jsk_perception/node_scripts/classification_node.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + +import rospy +from jsk_perception.vil_inference_client import ClipClientNode + + +def main(): + rospy.init_node("classification") + node = ClipClientNode() + rospy.spin() + +if __name__ == "__main__": + main() diff --git a/jsk_perception/node_scripts/hand_pose_estimation_2d.py b/jsk_perception/node_scripts/hand_pose_estimation_2d.py index 4617760375..800e706259 100755 --- a/jsk_perception/node_scripts/hand_pose_estimation_2d.py +++ b/jsk_perception/node_scripts/hand_pose_estimation_2d.py @@ -253,7 +253,7 @@ def _cb_with_depth_info(self, img_msg, depth_msg, camera_info_msg): elif depth_msg.encoding != '32FC1': rospy.logerr('Unsupported depth encoding: %s' % depth_msg.encoding) - hands_points, hands_point_scores, hands_score = self.hand_pose_estimate(img) + hands_points, hands_point_scores, hands_score = self.hand_pose_estimate(img, img_msg.header) hand_pose_2d_msg = self._create_2d_hand_pose_array_msgs( hands_points, @@ -318,7 +318,7 @@ def _cb(self, img_msg): img = self.bridge.imgmsg_to_cv2( img_msg, desired_encoding='bgr8') hands_points, hands_point_scores, hands_score = \ - self.hand_pose_estimate(img) + self.hand_pose_estimate(img, img_msg.header) hand_pose_msg = self._create_2d_hand_pose_array_msgs( hands_points, @@ -358,18 +358,19 @@ def _create_2d_hand_pose_array_msgs( hand_pose_msg.poses.append(pose_msg) return hand_pose_msg - def hand_pose_estimate(self, bgr): + def hand_pose_estimate(self, bgr, header): if self.backend == 'torch': - return self._hand_pose_estimate_torch_backend(bgr) + return self._hand_pose_estimate_torch_backend(bgr, header) raise ValueError('Unsupported backend: {0}'.format(self.backend)) - def _hand_pose_estimate_torch_backend(self, frame): + def _hand_pose_estimate_torch_backend(self, frame, header): hands_points, hands_rect, hands_point_scores, hands_score = \ self.pyramid_inference(frame) if self.visualize: vis_img = self._draw_joints(frame, hands_points, hands_rect) vis_msg = self.bridge.cv2_to_imgmsg(vis_img, encoding='bgr8') + vis_msg.header = header self.image_pub.publish(vis_msg) return hands_points, hands_point_scores, hands_score diff --git a/jsk_perception/node_scripts/label_image_classifier.py b/jsk_perception/node_scripts/label_image_classifier.py index 7804510b22..8494773d97 100755 --- a/jsk_perception/node_scripts/label_image_classifier.py +++ b/jsk_perception/node_scripts/label_image_classifier.py @@ -7,45 +7,8 @@ import rospy from sensor_msgs.msg import Image +from jsk_perception.label_image_classifier import LabelImageClassifier -class LabelImageClassifier(ConnectionBasedTransport): - - classifier_name = 'label_image_classifier' - - def __init__(self): - super(LabelImageClassifier, self).__init__() - self.ignore_labels = rospy.get_param('~ignore_labels', []) - self.target_names = rospy.get_param('~target_names', []) - self.pub = self.advertise( - '~output', ClassificationResult, queue_size=1) - - def subscribe(self): - self.sub = rospy.Subscriber('~input', Image, self._cb) - - def unsubscribe(self): - self.sub.unregister() - - def _cb(self, imgmsg): - bridge = cv_bridge.CvBridge() - img = bridge.imgmsg_to_cv2(imgmsg) - label, proba = self._classify(img) - msg = ClassificationResult() - msg.header = imgmsg.header - msg.labels = [label] - msg.label_names = [self.target_names[label]] - msg.label_proba = [proba[label]] - msg.probabilities = proba - msg.classifier = self.classifier_name - msg.target_names = self.target_names - self.pub.publish(msg) - - def _classify(self, label_img): - counts = np.bincount(label_img.flatten(), - minlength=len(self.target_names)) - counts[self.ignore_labels] = 0 - label = np.argmax(counts) - proba = counts.astype(np.float32) / counts.sum() - return label, proba if __name__ == '__main__': diff --git a/jsk_perception/node_scripts/matchtemplate.py b/jsk_perception/node_scripts/matchtemplate.py index 0a46300e0b..432aa5dfb0 100755 --- a/jsk_perception/node_scripts/matchtemplate.py +++ b/jsk_perception/node_scripts/matchtemplate.py @@ -3,7 +3,10 @@ import roslib; roslib.load_manifest('jsk_perception') import rospy import numpy as np -import thread +try: + import thread +except: + import _thread as thread from sensor_msgs.msg import Image from geometry_msgs.msg import * from jsk_recognition_msgs.msg import Rect @@ -37,10 +40,10 @@ def _MinMaxLock2nd(arr,ex_size,is_min): if is_min: idx = 0 else: idx = 1 status = cv2.minMaxLoc(arr) - pt1 = (max(status[2+idx][0]-ex_size[0]/2,0), - max(status[2+idx][1]-ex_size[1]/2,0)) - pt2 = (min(status[2+idx][0]+ex_size[0]/2,arr.shape[1]), - min(status[2+idx][1]+ex_size[1]/2,arr.shape[0])) + pt1 = (max(status[2+idx][0]-int(ex_size[0]/2),0), + max(status[2+idx][1]-int(ex_size[1]/2),0)) + pt2 = (min(status[2+idx][0]+int(ex_size[0]/2),arr.shape[1]), + min(status[2+idx][1]+int(ex_size[1]/2),arr.shape[0])) mask = np.ones((arr.shape[0], arr.shape[1]), dtype=np.uint8) * 255 mask[pt1[0]:pt2[0], pt1[1]:pt2[1]] = 0 status2 = cv2.minMaxLoc(arr, mask) @@ -127,8 +130,8 @@ def set_template (self, ref_id='', ref_image=None, ref_rect=None, template_image = cv2.cvtColor( ref_image_rect, cv2.COLOR_BGR2HSV) - self.templates[ref_id]['ref_point'] = (ref_rect[0]+ref_rect[2]/2, - ref_rect[1]+ref_rect[3]/2) + self.templates[ref_id]['ref_point'] = (ref_rect[0]+int(ref_rect[2]/2), + ref_rect[1]+int(ref_rect[3]/2)) self.templates[ref_id]['ref_image'] = template_image rospy.loginfo("set ref_image id=%s, rect=%s", ref_id, ref_rect); @@ -160,13 +163,13 @@ def set_reference (self, rect): def set_reference_point_callback (self, msg): # PointStamped self.lockobj.acquire() pt = (int(msg.point.x),int(msg.point.y)) - rect = (pt[0]-self.default_template_size[0]/2, - pt[1]-self.default_template_size[1]/2, + rect = (pt[0]-int(self.default_template_size[0]/2), + pt[1]-int(self.default_template_size[1]/2), self.default_template_size[0], self.default_template_size[1]) self.set_reference(rect) print(rect) - search_rect = (pt[0]-self.default_search_size[0]/2, - pt[1]-self.default_search_size[1]/2, + search_rect = (pt[0]-int(self.default_search_size[0]/2), + pt[1]-int(self.default_search_size[1]/2), self.default_search_size[0],self.default_search_size[1]) self.set_template('',ser_frame=None, ser_rect=search_rect) self.lockobj.release() @@ -204,7 +207,7 @@ def reconfigure(self,config,level): (template['ref_image'] is not None): ref_pt = template['ref_point'] ref_size = template['ref_image'].shape - ref_rect = (ref_pt[0]-ref_size[0]/2,ref_pt[1]-ref_size[1]/2,ref_size[0],ref_size[1]) + ref_rect = (ref_pt[0]-int(ref_size[0]/2),ref_pt[1]-int(ref_size[1]/2),ref_size[0],ref_size[1]) self.set_template(ref_id=config['current_template_id'], color=config['template_color_space'], ref_rect=ref_rect, @@ -270,8 +273,8 @@ def ser_image_callback (self, msg): status = MaxLock2nd(results,reference_size) # maximum for others status = (1 - status[0], 1 - status[1], status[2], status[3]) - result_pt = (status[2][0]+search_rect[0]+reference_size[0]/2, - status[2][1]+search_rect[1]+reference_size[1]/2) + result_pt = (status[2][0]+search_rect[0]+int(reference_size[0]/2), + status[2][1]+search_rect[1]+int(reference_size[1]/2)) # publish center position as result result.child_frame_id = template_id @@ -291,8 +294,8 @@ def ser_image_callback (self, msg): # self feedback if self.auto_search_area: - val_x = result_pt[0]-search_rect[2]/2 - val_y = result_pt[1]-search_rect[3]/2 + val_x = result_pt[0]-int(search_rect[2]/2) + val_y = result_pt[1]-int(search_rect[3]/2) ser_scale = max(2,5+np.log(status[0])) # ??? new_ser_rect = ( min(max(val_x,0),image_size[0]-search_rect[2]), @@ -303,10 +306,10 @@ def ser_image_callback (self, msg): # draw on debug image if self.show_debug_image: cv2.rectangle(debug_image, - (result_pt[0] - reference_size[0]/2, - result_pt[1] - reference_size[1]/2), - (result_pt[0] + reference_size[0]/2, - result_pt[1] + reference_size[1]/2), + (result_pt[0] - int(reference_size[0]/2), + result_pt[1] - int(reference_size[1]/2)), + (result_pt[0] + int(reference_size[0]/2), + result_pt[1] + int(reference_size[1]/2)), color=(0, 0, 255)) # red cv2.rectangle(debug_image, (search_rect[0], search_rect[1]), diff --git a/jsk_perception/node_scripts/people_pose_estimation_2d.py b/jsk_perception/node_scripts/people_pose_estimation_2d.py index 08cd7bcc19..62b2fbe8ba 100755 --- a/jsk_perception/node_scripts/people_pose_estimation_2d.py +++ b/jsk_perception/node_scripts/people_pose_estimation_2d.py @@ -706,7 +706,7 @@ def _draw_joints(self, img, people_joint_positions, all_peaks): cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color) cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi) # - offset += len(self.index2handname) / 2 + offset += int(len(self.index2handname) / 2) return img @@ -812,8 +812,8 @@ def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand): for i, heatmap in enumerate(hmaps): conf = heatmap.max() cds = np.array(np.where(heatmap==conf)).flatten().tolist() - py = cy + cds[0] - hand_bgr.shape[0] / 2 - px = cx + cds[1] - hand_bgr.shape[1] / 2 + py = cy + cds[0] - int(hand_bgr.shape[0] / 2) + px = cx + cds[1] - int(hand_bgr.shape[1] / 2) keypoints.append({'x': px, 'y': py, 'score': conf, 'limb': self.index2handname[idx_offset+i]}) return keypoints diff --git a/jsk_perception/node_scripts/probability_image_classifier.py b/jsk_perception/node_scripts/probability_image_classifier.py index 768ea81d72..65282c5199 100755 --- a/jsk_perception/node_scripts/probability_image_classifier.py +++ b/jsk_perception/node_scripts/probability_image_classifier.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from label_image_classifier import LabelImageClassifier +from jsk_perception.label_image_classifier import LabelImageClassifier import numpy as np import rospy diff --git a/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py b/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py index 3147d7cfe6..1bf0d8007a 100755 --- a/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py +++ b/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py @@ -50,22 +50,22 @@ def __init__(self): self._dimensions_labels = rospy.get_param( '~dimensions_labels', {} ) self._duration_timeout = rospy.get_param( '~duration_timeout', 0.05 ) - try: - msg_panorama_image = rospy.wait_for_message( - '~panorama_image', Image, 10) - msg_panorama_info = rospy.wait_for_message( - '~panorama_info', PanoramaInfo, 10) - except (rospy.ROSException, rospy.ROSInterruptException) as e: - rospy.logerr('{}'.format(e)) - sys.exit(1) + msg_panorama_info = None + while (not rospy.is_shutdown() and msg_panorama_info is None): + try: + msg_panorama_info = rospy.wait_for_message( + '~panorama_info', PanoramaInfo, 10) + except (rospy.ROSException, rospy.ROSInterruptException) as e: + rospy.logerr('~panorama_info is not subscribed...') + rospy.logerr('waiting ~panorama_info for more 10 seconds') self._frame_panorama = msg_panorama_info.header.frame_id self._theta_min = msg_panorama_info.theta_min self._theta_max = msg_panorama_info.theta_max self._phi_min = msg_panorama_info.phi_min self._phi_max = msg_panorama_info.phi_max - self._image_height = msg_panorama_image.height - self._image_width = msg_panorama_image.width + self._image_height = msg_panorama_info.image_height + self._image_width = msg_panorama_info.image_width self._tf_buffer = tf2_ros.Buffer() self._tf_listener = tf2_ros.TransformListener(self._tf_buffer) diff --git a/jsk_perception/node_scripts/regional_feature_based_object_recognition.py b/jsk_perception/node_scripts/regional_feature_based_object_recognition.py index 0bebd48372..a7217494bb 100755 --- a/jsk_perception/node_scripts/regional_feature_based_object_recognition.py +++ b/jsk_perception/node_scripts/regional_feature_based_object_recognition.py @@ -73,6 +73,7 @@ def __init__(self): rospy.loginfo('Fitting KNN from db') db = np.load(db_file) X, y, self.target_names = db['X'], db['y'], db['target_names'] + self.target_names = np.array(list(map(lambda name: name.decode('utf-8') if hasattr(name, 'decode') else name, self.target_names))) self.knn = KNeighborsClassifier(n_neighbors=10) self.knn.fit(X, y) rospy.loginfo('Finished fitting KNN from db') diff --git a/jsk_perception/node_scripts/sklearn_classifier.py b/jsk_perception/node_scripts/sklearn_classifier.py index e42dbef968..be3729f5c4 100755 --- a/jsk_perception/node_scripts/sklearn_classifier.py +++ b/jsk_perception/node_scripts/sklearn_classifier.py @@ -3,7 +3,11 @@ # from __future__ import division import gzip -import cPickle as pickle +import sys +if sys.version_info.major <= 2: + import cPickle as pickle +else: # for python3 + import _pickle as pickle import numpy as np from sklearn.preprocessing import normalize @@ -23,7 +27,10 @@ def __init__(self): def _init_classifier(self): clf_path = rospy.get_param('~clf_path') with gzip.open(clf_path) as f: - self.clf = pickle.load(f) + if sys.version_info.major <= 2: + self.clf = pickle.load(f) + else: + self.clf = pickle.load(f, encoding='latin1') def subscribe(self): self.sub_hist = rospy.Subscriber('~input', VectorArray, self._predict) diff --git a/jsk_perception/node_scripts/solidity_rag_merge.py b/jsk_perception/node_scripts/solidity_rag_merge.py index 7f343d51b1..ba5e2a2b38 100755 --- a/jsk_perception/node_scripts/solidity_rag_merge.py +++ b/jsk_perception/node_scripts/solidity_rag_merge.py @@ -39,6 +39,8 @@ def rag_solidity(labels, connectivity=2): + if LooseVersion(skimage.__version__) >= '0.16.0': + RAG.node = RAG.nodes graph = RAG() # The footprint is constructed in such a way that the first diff --git a/jsk_perception/node_scripts/split_image.py b/jsk_perception/node_scripts/split_image.py index a9f4b72595..66170ec3b0 100755 --- a/jsk_perception/node_scripts/split_image.py +++ b/jsk_perception/node_scripts/split_image.py @@ -47,5 +47,6 @@ def _split_cb(self, msg): if __name__ == '__main__': rospy.init_node('split_image') + rospy.logwarn("split_image.py would be deprecated. Please use NODELET version of split_image") SplitImage() rospy.spin() diff --git a/jsk_perception/node_scripts/ssd_object_detector.py b/jsk_perception/node_scripts/ssd_object_detector.py index e95d189f67..359568f9ae 100755 --- a/jsk_perception/node_scripts/ssd_object_detector.py +++ b/jsk_perception/node_scripts/ssd_object_detector.py @@ -186,9 +186,9 @@ def image_cb(self, msg): rect_msg = RectArray(header=msg.header) for bbox in bboxes: - rect = Rect(x=bbox[1], y=bbox[0], - width= bbox[3] - bbox[1], - height=bbox[2] - bbox[0]) + rect = Rect(x=int(bbox[1]), y=int(bbox[0]), + width= int(bbox[3] - bbox[1]), + height=int(bbox[2] - bbox[0])) rect_msg.rects.append(rect) if self.profiling: diff --git a/jsk_perception/node_scripts/vgg16_object_recognition.py b/jsk_perception/node_scripts/vgg16_object_recognition.py index ce29e13175..4169ecac57 100755 --- a/jsk_perception/node_scripts/vgg16_object_recognition.py +++ b/jsk_perception/node_scripts/vgg16_object_recognition.py @@ -43,109 +43,8 @@ import rospy from sensor_msgs.msg import Image +from jsk_perception.vgg16_object_recognition import VGG16ObjectRecognition -class VGG16ObjectRecognition(ConnectionBasedTransport): - - mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) - - def __init__(self): - super(self.__class__, self).__init__() - self.insize = 224 - self.gpu = rospy.get_param('~gpu', -1) - self.target_names = rospy.get_param('~target_names') - self.model_name = rospy.get_param('~model_name') - if self.model_name == 'vgg16': - self.model = VGG16(n_class=len(self.target_names)) - elif self.model_name == 'vgg16_batch_normalization': - self.model = VGG16BatchNormalization( - n_class=len(self.target_names)) - else: - rospy.logerr('Unsupported ~model_name: {0}' - .format(self.model_name)) - model_file = rospy.get_param('~model_file') - S.load_hdf5(model_file, self.model) - if self.gpu != -1: - self.model.to_gpu(self.gpu) - self.pub = self.advertise('~output', ClassificationResult, - queue_size=1) - self.pub_input = self.advertise( - '~debug/net_input', Image, queue_size=1) - - def subscribe(self): - if rospy.get_param('~use_mask', False): - # larger buff_size is necessary for taking time callback - # http://stackoverflow.com/questions/26415699/ros-subscriber-not-up-to-date/29160379#29160379 # NOQA - sub = message_filters.Subscriber( - '~input', Image, queue_size=1, buff_size=2**24) - sub_mask = message_filters.Subscriber( - '~input/mask', Image, queue_size=1, buff_size=2**24) - self.subs = [sub, sub_mask] - queue_size = rospy.get_param('~queue_size', 10) - if rospy.get_param('~approximate_sync', False): - slop = rospy.get_param('~slop', 0.1) - sync = message_filters.ApproximateTimeSynchronizer( - self.subs, queue_size=queue_size, slop=slop) - else: - sync = message_filters.TimeSynchronizer( - self.subs, queue_size=queue_size) - sync.registerCallback(self._recognize) - else: - sub = rospy.Subscriber( - '~input', Image, self._recognize, callback_args=None, - queue_size=1, buff_size=2**24) - self.subs = [sub] - - def unsubscribe(self): - for sub in self.subs: - sub.unregister() - - def _recognize(self, imgmsg, mask_msg=None): - bridge = cv_bridge.CvBridge() - bgr = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8') - if mask_msg is not None: - mask = bridge.imgmsg_to_cv2(mask_msg) - if mask.shape != bgr.shape[:2]: - logerr_throttle(10, - 'Size of input image and mask is different') - return - elif mask.size == 0: - logerr_throttle(10, 'Size of input mask is 0') - return - bgr[mask == 0] = self.mean_bgr - bgr = skimage.transform.resize( - bgr, (self.insize, self.insize), preserve_range=True) - input_msg = bridge.cv2_to_imgmsg(bgr.astype(np.uint8), encoding='bgr8') - input_msg.header = imgmsg.header - self.pub_input.publish(input_msg) - - blob = (bgr - self.mean_bgr).transpose((2, 0, 1)) - x_data = np.array([blob], dtype=np.float32) - if self.gpu != -1: - x_data = cuda.to_gpu(x_data, device=self.gpu) - if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'): - x = Variable(x_data, volatile=True) - self.model.train = False - self.model(x) - else: - with chainer.using_config('train', False), \ - chainer.no_backprop_mode(): - x = Variable(x_data) - self.model(x) - - proba = cuda.to_cpu(self.model.pred.data)[0] - label = np.argmax(proba) - label_name = self.target_names[label] - label_proba = proba[label] - cls_msg = ClassificationResult( - header=imgmsg.header, - labels=[label], - label_names=[label_name], - label_proba=[label_proba], - probabilities=proba, - classifier=self.model_name, - target_names=self.target_names, - ) - self.pub.publish(cls_msg) if __name__ == '__main__': diff --git a/jsk_perception/node_scripts/vqa_node.py b/jsk_perception/node_scripts/vqa_node.py new file mode 100755 index 0000000000..6f3cea1eaf --- /dev/null +++ b/jsk_perception/node_scripts/vqa_node.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + +import rospy +from jsk_perception.vil_inference_client import OFAClientNode + + +def main(): + rospy.init_node("vqa") + node = OFAClientNode() + rospy.spin() + +if __name__ == "__main__": + main() diff --git a/jsk_perception/package.xml b/jsk_perception/package.xml index 9b61648d03..b7e25d7f9c 100644 --- a/jsk_perception/package.xml +++ b/jsk_perception/package.xml @@ -94,6 +94,8 @@ python-fcn-pip python-pytesseract-pip + python-requests + python3-requests python-sklearn python3-sklearn python-yaml @@ -119,9 +121,12 @@ + compressed_depth_image_transport + compressed_image_transport jsk_tools roslaunch roslint + rosservice rostest diff --git a/jsk_perception/plugins/nodelet/libjsk_perception.xml b/jsk_perception/plugins/nodelet/libjsk_perception.xml index 2d6d012b52..35828883fc 100644 --- a/jsk_perception/plugins/nodelet/libjsk_perception.xml +++ b/jsk_perception/plugins/nodelet/libjsk_perception.xml @@ -143,6 +143,9 @@ + + @@ -290,8 +293,20 @@ type="jsk_perception::VirtualCameraMono" base_class_type="nodelet::Nodelet"> + + + + + + diff --git a/jsk_perception/sample/config/sample_ofa_config.rviz b/jsk_perception/sample/config/sample_ofa_config.rviz new file mode 100644 index 0000000000..458138e652 --- /dev/null +++ b/jsk_perception/sample/config/sample_ofa_config.rviz @@ -0,0 +1,181 @@ +Panels: + - Class: rviz/Displays + Help Height: 138 + Name: Displays + Property Tree Widget: + Expanded: ~ + Splitter Ratio: 0.4870370328426361 + Tree Height: 509 + - Class: rviz/Selection + Name: Selection + - Class: rviz/Tool Properties + Expanded: + - /2D Pose Estimate1 + - /2D Nav Goal1 + - /Publish Point1 + Name: Tool Properties + Splitter Ratio: 0.5886790156364441 + - Class: rviz/Views + Expanded: + - /Current View1 + Name: Views + Splitter Ratio: 0.5 + - Class: rviz/Time + Name: Time + SyncMode: 0 + SyncSource: "" +Preferences: + PromptSaveOnExit: true +Toolbars: + toolButtonStyle: 2 +Visualization Manager: + Class: "" + Displays: + - Alpha: 0.5 + Cell Size: 1 + Class: rviz/Grid + Color: 160; 160; 164 + Enabled: false + Line Style: + Line Width: 0.029999999329447746 + Value: Lines + Name: Grid + Normal Cell Count: 0 + Offset: + X: 0 + Y: 0 + Z: 0 + Plane: XY + Plane Cell Count: 10 + Reference Frame: + Value: false + - Class: jsk_rviz_plugin/OverlayImage + Enabled: true + Name: VQA/VQA/Image + Topic: /vqa/result/image + Value: true + alpha: 0.800000011920929 + height: 128 + keep aspect ratio: true + left: 10 + overwrite alpha value: false + top: 10 + transport hint: raw + width: 320 + - Align Bottom: false + Background Alpha: 0.800000011920929 + Background Color: 0; 0; 0 + Class: jsk_rviz_plugin/String + Enabled: true + Foreground Alpha: 0.800000011920929 + Foreground Color: 255; 255; 255 + Name: VQA/VQA/Visualize + Overtake Color Properties: true + Overtake Position Properties: true + Topic: /vqa/visualize + Value: true + font: DejaVu Sans Mono + height: 500 + left: 10 + line width: 2 + text size: 12 + top: 320 + width: 512 + - Class: jsk_rviz_plugin/OverlayImage + Enabled: true + Name: Classification/Classification/Image + Topic: /classification/result/image + Value: true + alpha: 0.800000011920929 + height: 128 + keep aspect ratio: true + left: 530 + overwrite alpha value: false + top: 10 + transport hint: raw + width: 320 + - Align Bottom: false + Background Alpha: 0.800000011920929 + Background Color: 0; 0; 0 + Class: jsk_rviz_plugin/String + Enabled: true + Foreground Alpha: 0.800000011920929 + Foreground Color: 255; 255; 255 + Name: ClassificationClassification/Visualize + Overtake Color Properties: true + Overtake Position Properties: true + Topic: /classification/visualize + Value: true + font: DejaVu Sans Mono + height: 500 + left: 530 + line width: 2 + text size: 12 + top: 320 + width: 512 + Enabled: true + Global Options: + Background Color: 48; 48; 48 + Default Light: true + Fixed Frame: map + Frame Rate: 30 + Name: root + Tools: + - Class: rviz/Interact + Hide Inactive Objects: true + - Class: rviz/MoveCamera + - Class: rviz/Select + - Class: rviz/FocusCamera + - Class: rviz/Measure + - Class: rviz/SetInitialPose + Theta std deviation: 0.2617993950843811 + Topic: /initialpose + X std deviation: 0.5 + Y std deviation: 0.5 + - Class: rviz/SetGoal + Topic: /move_base_simple/goal + - Class: rviz/PublishPoint + Single click: true + Topic: /clicked_point + Value: true + Views: + Current: + Class: rviz/Orbit + Distance: 10 + Enable Stereo Rendering: + Stereo Eye Separation: 0.05999999865889549 + Stereo Focal Distance: 1 + Swap Stereo Eyes: false + Value: false + Field of View: 0.7853981852531433 + Focal Point: + X: 0 + Y: 0 + Z: 0 + Focal Shape Fixed Size: true + Focal Shape Size: 0.05000000074505806 + Invert Z Axis: false + Name: Current View + Near Clip Distance: 0.009999999776482582 + Pitch: 0.785398006439209 + Target Frame: + Yaw: 0.785398006439209 + Saved: ~ +Window Geometry: + Displays: + collapsed: false + Height: 1025 + Hide Left Dock: false + Hide Right Dock: true + QMainWindow State: 000000ff00000000fd00000004000000000000021e000002f7fc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b000000b100fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000b1fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000006f000002f70000018400fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000010f0000037efc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003d0000037e0000013500fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000078000000060fc0100000002fb0000000800540069006d0065010000000000000780000005cd00fffffffb0000000800540069006d0065010000000000000450000000000000000000000556000002f700000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 + Selection: + collapsed: false + Time: + collapsed: false + Tool Properties: + collapsed: false + Views: + collapsed: true + Width: 1920 + X: 1440 + Y: 1096 diff --git a/jsk_perception/sample/image/items_in_shelf.jpg b/jsk_perception/sample/image/items_in_shelf.jpg new file mode 100644 index 0000000000..699917fa3f Binary files /dev/null and b/jsk_perception/sample/image/items_in_shelf.jpg differ diff --git a/jsk_perception/sample/image/pai_no_mi.jpg b/jsk_perception/sample/image/pai_no_mi.jpg new file mode 100644 index 0000000000..14aff8a629 Binary files /dev/null and b/jsk_perception/sample/image/pai_no_mi.jpg differ diff --git a/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml b/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml index b12b21acbb..498dafe82f 100644 --- a/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml +++ b/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml @@ -2,7 +2,7 @@ + command="$(find xacro)/xacro '$(find pr2_description)/robots/pr2.urdf.xacro'"/> diff --git a/jsk_perception/sample/sample_classification.launch b/jsk_perception/sample/sample_classification.launch new file mode 100644 index 0000000000..71984cea22 --- /dev/null +++ b/jsk_perception/sample/sample_classification.launch @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/jsk_perception/sample/sample_color_histogram.launch b/jsk_perception/sample/sample_color_histogram.launch index b5c1fb791b..d75d2fc11c 100644 --- a/jsk_perception/sample/sample_color_histogram.launch +++ b/jsk_perception/sample/sample_color_histogram.launch @@ -24,8 +24,9 @@ use_window: $(arg gui) + + pkg="jsk_perception" type="publish_mouse_event.py"> image_width: 256 diff --git a/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch b/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch index 12d867d767..457a49071d 100644 --- a/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch +++ b/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch @@ -6,21 +6,33 @@ + + + + + + + + args="load jsk_perception/DualFisheyeToPanorama $(arg MANAGER)"> + + diff --git a/jsk_perception/sample/sample_insta360_air.launch b/jsk_perception/sample/sample_insta360_air.launch index 06bbba3e5e..b79b010c55 100644 --- a/jsk_perception/sample/sample_insta360_air.launch +++ b/jsk_perception/sample/sample_insta360_air.launch @@ -27,7 +27,16 @@ + + + + + + @@ -57,7 +66,10 @@ - + vertical_parts: 1 @@ -68,7 +80,7 @@ @@ -76,7 +88,7 @@ @@ -84,7 +96,7 @@ @@ -101,6 +113,10 @@ + + + + diff --git a/jsk_perception/sample/sample_invert_mask_image.launch b/jsk_perception/sample/sample_invert_mask_image.launch new file mode 100644 index 0000000000..d7c338f962 --- /dev/null +++ b/jsk_perception/sample/sample_invert_mask_image.launch @@ -0,0 +1,81 @@ + + + + + + + + + + + file_name: $(find jsk_perception)/sample/kiva_pod_image_color.jpg + encoding: bgr8 + publish_info: false + + + + + + + file_name: $(find jsk_perception)/sample/kiva_pod_mask.jpg + encoding: mono8 + publish_info: false + + + + + + + + + + + + + + approximate_sync: true + + + + + + + + approximate_sync: true + + + + + + + + + + + + + + + + + + + diff --git a/jsk_perception/sample/sample_point_pose_extractor.launch b/jsk_perception/sample/sample_point_pose_extractor.launch new file mode 100644 index 0000000000..07112f8081 --- /dev/null +++ b/jsk_perception/sample/sample_point_pose_extractor.launch @@ -0,0 +1,40 @@ + + + + + + + + file_name: $(find jsk_perception)/sample/image/items_in_shelf.jpg + encoding: bgr8 + publish_info: true + fovx: 84.1 + fovy: 53.8 + + + + + + + + + + + + + template_filename: $(find jsk_perception)/sample/image/pai_no_mi.jpg + template_id: 0 + object_width: 0.145 + object_height: 0.128 + viewer_window: $(arg gui) + + + diff --git a/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch b/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch index e3982cec2b..9d0a9a5a0e 100644 --- a/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch +++ b/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch @@ -47,7 +47,6 @@ type="rect_array_in_panorama_to_bounding_box_array.py" name="rect_array_in_panorama_to_bounding_box_array" > - diff --git a/jsk_perception/sample/sample_split_image.launch b/jsk_perception/sample/sample_split_image.launch index 400e9ba0a6..b683a995ea 100644 --- a/jsk_perception/sample/sample_split_image.launch +++ b/jsk_perception/sample/sample_split_image.launch @@ -12,14 +12,16 @@ publish_info: true fovx: 84.1 fovy: 53.8 + rate: 60.0 - + vertical_parts: 2 horizontal_parts: 2 + always_subscribe: true diff --git a/jsk_perception/sample/sample_video_to_scene.launch b/jsk_perception/sample/sample_video_to_scene.launch new file mode 100644 index 0000000000..94a4c1d0af --- /dev/null +++ b/jsk_perception/sample/sample_video_to_scene.launch @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/jsk_perception/sample/sample_vqa.launch b/jsk_perception/sample/sample_vqa.launch new file mode 100644 index 0000000000..daa9865e50 --- /dev/null +++ b/jsk_perception/sample/sample_vqa.launch @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/jsk_perception/scripts/send_classification_query_action.py b/jsk_perception/scripts/send_classification_query_action.py new file mode 100755 index 0000000000..db15f17a02 --- /dev/null +++ b/jsk_perception/scripts/send_classification_query_action.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +import actionlib +import jsk_recognition_msgs.msg +import rospy +from sensor_msgs.msg import Image + +def cb(image): + ac = actionlib.SimpleActionClient("/classification/inference_server", + jsk_recognition_msgs.msg.ClassificationTaskAction) + ac.wait_for_server() + goal = jsk_recognition_msgs.msg.ClassificationTaskGoal() + goal.image = image + goal.queries = ["human", "apple", "book"] + ac.send_goal(goal) + ac.wait_for_result() + print(ac.get_result()) + +rospy.init_node("test_classification_action") +sub = rospy.Subscriber("/usb_cam/image_raw", Image, cb) +rospy.spin() diff --git a/jsk_perception/scripts/send_vqa_query_action.py b/jsk_perception/scripts/send_vqa_query_action.py new file mode 100755 index 0000000000..ad1325290a --- /dev/null +++ b/jsk_perception/scripts/send_vqa_query_action.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import actionlib +import jsk_recognition_msgs.msg +import rospy +from sensor_msgs.msg import Image + +def cb(image): + ac = actionlib.SimpleActionClient("/vqa/inference_server" , jsk_recognition_msgs.msg.VQATaskAction) + ac.wait_for_server() + goal = jsk_recognition_msgs.msg.VQATaskGoal() + goal.image = image + goal.questions = ["what does this image decribe?"] + ac.send_goal(goal) + ac.wait_for_result() + print(ac.get_result()) + +rospy.init_node("test_vqa_action") +sub = rospy.Subscriber("/usb_cam/image_raw", Image, cb) +rospy.spin() diff --git a/jsk_perception/scripts/vqa_request.py b/jsk_perception/scripts/vqa_request.py new file mode 100755 index 0000000000..ee7c1e8df3 --- /dev/null +++ b/jsk_perception/scripts/vqa_request.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +import argparse +import actionlib +import cv2 +from cv_bridge import CvBridge +from jsk_recognition_msgs.msg import VQATaskAction, VQATaskActionGoal +import json +import rospy # for logging +import os + +class VQAClient(object): + def __init__(self): + self.ac_caption_client = actionlib.SimpleActionClient("/vqa/vqa_server", VQATaskAction) + self._bridge = CvBridge() + + def request(self, questions_path, image_path, output_dir=None): + self.ac_caption_client.wait_for_server() + caption_goal = VQATaskActionGoal() + cv_image = cv2.imread(image_path) + image = self._bridge.cv2_to_imgmsg(cv_image, "bgr8") + caption_goal.goal.image = image + with open(questions_path) as f: + for q in f: + caption_goal.goal.questions.append(q) + self.ac_caption_client.send_goal(caption_goal.goal) + self.ac_caption_client.wait_for_result() + caption_result = self.ac_caption_client.get_result() + result = {} + for r in caption_result.result.result: + result[r.question] = r.answer + image_name = os.path.splitext(os.path.basename(image_path))[0] + json_name = image_name + ".json" + if output_dir: + save_path = os.path.join(output_dir, json_name) + else: + save_path = os.path.join(os.path.dirname(image_path), json_name) + with open(save_path, "w") as f: + json.dump(result, f, indent=4, separators=(',', ': ')) + +parser = argparse.ArgumentParser(description="CLI interface for VQA action client") +parser.add_argument("questions_path", help="Question text file path of VQA input", type=str) +parser.add_argument("image_path", help="Image file path of VQA input", type=str) +parser.add_argument("-o", "--output", default=None) + +args = parser.parse_args() + +if __name__ == "__main__": + rospy.init_node("vqa_request_client", anonymous=True) + client = VQAClient() + questions_path = args.questions_path + image_path = args.image_path + output = args.output + client.request(questions_path, image_path, output) diff --git a/jsk_perception/setup.py b/jsk_perception/setup.py index 66f43a495a..b4eee0e9e8 100644 --- a/jsk_perception/setup.py +++ b/jsk_perception/setup.py @@ -3,6 +3,9 @@ from setuptools import setup from catkin_pkg.python_setup import generate_distutils_setup -d = generate_distutils_setup() +d = generate_distutils_setup( + packages=['jsk_perception'], + package_dir={'': 'src'}, +) setup(**d) diff --git a/jsk_perception/src/dual_fisheye_to_panorama.cpp b/jsk_perception/src/dual_fisheye_to_panorama.cpp index d749cec51b..a10726d6ee 100644 --- a/jsk_perception/src/dual_fisheye_to_panorama.cpp +++ b/jsk_perception/src/dual_fisheye_to_panorama.cpp @@ -92,6 +92,8 @@ namespace jsk_perception msg_panorama_info_.theta_max = M_PI; msg_panorama_info_.phi_min = -M_PI; msg_panorama_info_.phi_max = M_PI; + msg_panorama_info_.image_height = output_image_height_; + msg_panorama_info_.image_width = output_image_width_; onInitPostProcess(); } diff --git a/jsk_perception/src/invert_mask_image.cpp b/jsk_perception/src/invert_mask_image.cpp new file mode 100644 index 0000000000..3fca1bdd69 --- /dev/null +++ b/jsk_perception/src/invert_mask_image.cpp @@ -0,0 +1,88 @@ +// -*- mode: c++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2022, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ + +#include "jsk_perception/invert_mask_image.h" +#include +#include +#include +#include + + +namespace jsk_perception +{ + void InvertMaskImage::onInit() + { + DiagnosticNodelet::onInit(); + pub_mask_ = advertise(*pnh_, "output", 1); + onInitPostProcess(); + } + + InvertMaskImage::~InvertMaskImage() { + } + + void InvertMaskImage::subscribe() + { + int queue_size; + pnh_->param("queue_size", queue_size, 1); + sub_mask_ = pnh_->subscribe("input", queue_size, &InvertMaskImage::invert, this); + ros::V_string names = boost::assign::list_of("~input"); + jsk_topic_tools::warnNoRemap(names); + } + + void InvertMaskImage::unsubscribe() + { + sub_mask_.shutdown(); + } + + void InvertMaskImage::invert(const sensor_msgs::Image::ConstPtr& mask_msg) + { + vital_checker_->poke(); + cv::Mat mask; + try { + mask = cv_bridge::toCvShare(mask_msg, "mono8")->image; + } catch(std::exception &e) { + ROS_ERROR("cv_bridge exception: %s", e.what()); + return; + } + cv::bitwise_not(mask, mask); + pub_mask_.publish(cv_bridge::CvImage( + mask_msg->header, + "mono8", + mask).toImageMsg()); + } +} + +#include +PLUGINLIB_EXPORT_CLASS (jsk_perception::InvertMaskImage, nodelet::Nodelet); diff --git a/jsk_perception/src/jsk_perception/__init__.py b/jsk_perception/src/jsk_perception/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/jsk_perception/src/jsk_perception/label_image_classifier.py b/jsk_perception/src/jsk_perception/label_image_classifier.py new file mode 100644 index 0000000000..08567fb847 --- /dev/null +++ b/jsk_perception/src/jsk_perception/label_image_classifier.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import cv_bridge +from jsk_recognition_msgs.msg import ClassificationResult +from jsk_topic_tools import ConnectionBasedTransport +import numpy as np +import rospy +from sensor_msgs.msg import Image + +class LabelImageClassifier(ConnectionBasedTransport): + + classifier_name = 'label_image_classifier' + + def __init__(self): + super(LabelImageClassifier, self).__init__() + self.ignore_labels = rospy.get_param('~ignore_labels', []) + self.target_names = rospy.get_param('~target_names', []) + self.pub = self.advertise( + '~output', ClassificationResult, queue_size=1) + + def subscribe(self): + self.sub = rospy.Subscriber('~input', Image, self._cb) + + def unsubscribe(self): + self.sub.unregister() + + def _cb(self, imgmsg): + bridge = cv_bridge.CvBridge() + img = bridge.imgmsg_to_cv2(imgmsg) + label, proba = self._classify(img) + msg = ClassificationResult() + msg.header = imgmsg.header + msg.labels = [label] + msg.label_names = [self.target_names[label]] + msg.label_proba = [proba[label]] + msg.probabilities = proba + msg.classifier = self.classifier_name + msg.target_names = self.target_names + self.pub.publish(msg) + + def _classify(self, label_img): + counts = np.bincount(label_img.flatten(), + minlength=len(self.target_names)) + counts[self.ignore_labels] = 0 + label = np.argmax(counts) + proba = counts.astype(np.float32) / counts.sum() + return label, proba diff --git a/jsk_perception/src/jsk_perception/vgg16_object_recognition.py b/jsk_perception/src/jsk_perception/vgg16_object_recognition.py new file mode 100644 index 0000000000..f582daa74b --- /dev/null +++ b/jsk_perception/src/jsk_perception/vgg16_object_recognition.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import itertools, pkg_resources, sys +from distutils.version import LooseVersion +if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \ + sys.version_info.major == 2: + print('''Please install chainer < 7.0.0: + + sudo pip install chainer==6.7.0 + +c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485 +''', file=sys.stderr) + sys.exit(1) +if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name or "cupy" == p.project_name ] == []: + print('''Please install CuPy + + sudo pip install cupy-cuda[your cuda version] +i.e. + sudo pip install cupy-cuda91 + +''', file=sys.stderr) + # sys.exit(1) +import chainer +from chainer import cuda +import chainer.serializers as S +from chainer import Variable +from distutils.version import LooseVersion +import numpy as np +import skimage.transform + +import cv_bridge +from jsk_recognition_msgs.msg import ClassificationResult +from jsk_recognition_utils.chainermodels import VGG16 +from jsk_recognition_utils.chainermodels import VGG16BatchNormalization +from jsk_topic_tools import ConnectionBasedTransport +from jsk_topic_tools.log_utils import logerr_throttle +import message_filters +import rospy +from sensor_msgs.msg import Image + + +class VGG16ObjectRecognition(ConnectionBasedTransport): + + mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) + + def __init__(self): + super(self.__class__, self).__init__() + self.insize = 224 + self.gpu = rospy.get_param('~gpu', -1) + self.target_names = rospy.get_param('~target_names') + self.model_name = rospy.get_param('~model_name') + if self.model_name == 'vgg16': + self.model = VGG16(n_class=len(self.target_names)) + elif self.model_name == 'vgg16_batch_normalization': + self.model = VGG16BatchNormalization( + n_class=len(self.target_names)) + else: + rospy.logerr('Unsupported ~model_name: {0}' + .format(self.model_name)) + model_file = rospy.get_param('~model_file') + S.load_hdf5(model_file, self.model) + if self.gpu != -1: + self.model.to_gpu(self.gpu) + self.pub = self.advertise('~output', ClassificationResult, + queue_size=1) + self.pub_input = self.advertise( + '~debug/net_input', Image, queue_size=1) + + def subscribe(self): + if rospy.get_param('~use_mask', False): + # larger buff_size is necessary for taking time callback + # http://stackoverflow.com/questions/26415699/ros-subscriber-not-up-to-date/29160379#29160379 # NOQA + sub = message_filters.Subscriber( + '~input', Image, queue_size=1, buff_size=2**24) + sub_mask = message_filters.Subscriber( + '~input/mask', Image, queue_size=1, buff_size=2**24) + self.subs = [sub, sub_mask] + queue_size = rospy.get_param('~queue_size', 10) + if rospy.get_param('~approximate_sync', False): + slop = rospy.get_param('~slop', 0.1) + sync = message_filters.ApproximateTimeSynchronizer( + self.subs, queue_size=queue_size, slop=slop) + else: + sync = message_filters.TimeSynchronizer( + self.subs, queue_size=queue_size) + sync.registerCallback(self._recognize) + else: + sub = rospy.Subscriber( + '~input', Image, self._recognize, callback_args=None, + queue_size=1, buff_size=2**24) + self.subs = [sub] + + def unsubscribe(self): + for sub in self.subs: + sub.unregister() + + def _recognize(self, imgmsg, mask_msg=None): + bridge = cv_bridge.CvBridge() + bgr = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8') + if mask_msg is not None: + mask = bridge.imgmsg_to_cv2(mask_msg) + if mask.shape != bgr.shape[:2]: + logerr_throttle(10, + 'Size of input image and mask is different') + return + elif mask.size == 0: + logerr_throttle(10, 'Size of input mask is 0') + return + bgr[mask == 0] = self.mean_bgr + bgr = skimage.transform.resize( + bgr, (self.insize, self.insize), preserve_range=True) + input_msg = bridge.cv2_to_imgmsg(bgr.astype(np.uint8), encoding='bgr8') + input_msg.header = imgmsg.header + self.pub_input.publish(input_msg) + + blob = (bgr - self.mean_bgr).transpose((2, 0, 1)) + x_data = np.array([blob], dtype=np.float32) + if self.gpu != -1: + x_data = cuda.to_gpu(x_data, device=self.gpu) + if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'): + x = Variable(x_data, volatile=True) + self.model.train = False + self.model(x) + else: + with chainer.using_config('train', False), \ + chainer.no_backprop_mode(): + x = Variable(x_data) + self.model(x) + + proba = cuda.to_cpu(self.model.pred.data)[0] + label = np.argmax(proba) + label_name = self.target_names[label] + label_proba = proba[label] + cls_msg = ClassificationResult( + header=imgmsg.header, + labels=[label], + label_names=[label_name], + label_proba=[label_proba], + probabilities=proba, + classifier=self.model_name, + target_names=self.target_names, + ) + self.pub.publish(cls_msg) diff --git a/jsk_perception/src/jsk_perception/vil_inference_client.py b/jsk_perception/src/jsk_perception/vil_inference_client.py new file mode 100644 index 0000000000..02026af4ff --- /dev/null +++ b/jsk_perception/src/jsk_perception/vil_inference_client.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python +import abc +import base64 +import json + +import actionlib +import requests +import rospy +from cv_bridge import CvBridge +from dynamic_reconfigure.server import Server +from jsk_perception.cfg import ClassificationConfig, VQAConfig +from jsk_recognition_msgs.msg import (ClassificationResult, + ClassificationTaskAction, + ClassificationTaskFeedback, + ClassificationTaskResult, + QuestionAndAnswerText, VQAResult, + VQATaskAction, VQATaskFeedback, + VQATaskResult) +from requests.exceptions import ConnectionError +from sensor_msgs.msg import CompressedImage, Image +from std_msgs.msg import String + +import cv2 + + +class DockerInferenceClientBase(object): + def __init__(self, action, + server_config, + result_topic, + action_feedback, + action_result, + app_name): + # inference server configuration + self.host = rospy.get_param("~host", default="localhost") + self.port = rospy.get_param("~port", default=8888) + self.app_name = app_name + # cv bridge + self._bridge = CvBridge() + # default inference image + self.default_img = None + # ROS + self.transport_hint = rospy.get_param('~image_transport', 'raw') + if self.transport_hint == 'compressed': + self.image_sub = rospy.Subscriber( + "{}/compressed".format(rospy.resolve_name('~image')), + CompressedImage, + callback=self.topic_cb, + queue_size=1, + buff_size=2**26 + ) + + else: + self.image_sub = rospy.Subscriber("~image", Image, + callback=self.topic_cb, + queue_size=1, + buff_size=2**26) + self.result_topic_type = result_topic + self.result_pub = rospy.Publisher("~result", result_topic, queue_size=1) + if self.transport_hint == 'compressed': + self.image_pub = rospy.Publisher("~result/image/compressed", CompressedImage, queue_size=1) + else: + self.image_pub = rospy.Publisher("~result/image", Image, queue_size=1) + self.vis_pub = rospy.Publisher("~visualize", String, queue_size=1) + self.action_server = actionlib.SimpleActionServer("~inference_server", + action, + execute_cb=self.action_cb, + auto_start=False) + self.action_feedback = action_feedback + self.action_result = action_result + self.reconfigure_server = Server(server_config, self.config_cb) + self.action_server.start() + + def ros_img_to_base(self, ros_img): + if type(ros_img) is CompressedImage: + cv_img = self._bridge.compressed_imgmsg_to_cv2(ros_img, desired_encoding="bgr8") + elif type(ros_img) is Image: + cv_img = self._bridge.imgmsg_to_cv2(ros_img, desired_encoding="bgr8") + else: + raise RuntimeError("Unknown type {}".format(type(ros_img))) + # convert to base64 + encimg = cv2.imencode(".png", cv_img)[1] + img_str = encimg.tostring() + img_byte = base64.b64encode(img_str).decode("utf-8") + return img_byte + + def config_cb(self, config, level): + self.config = config + return config + + @abc.abstractmethod + def topic_cb(self, msg): + pass + + def action_cb(self, goal): + success = True + result = self.action_result() + feedback = self.action_feedback() + if goal.image.data and (not goal.compressed_image.data): + image = goal.image + # result.result.image = image + elif (not goal.image.data) and goal.compressed_image.data: + image = goal.compressed_image + # result.result.compressed_image = image + elif goal.image.data and goal.image.compressed_image.data: + rospy.logerr("Both image and compressed image can not be added simultaneously") + return + else: + rospy.loginfo("No images in goal message, so using subscribed image topic instead") + image = self.default_img + queries = self.create_queries(goal) + try: + result.result = self.inference(image, queries) + except Exception as e: + rospy.logerr(str(e)) + feedback.status = str(e) + success = False + finally: + self.action_server.publish_feedback(feedback) + result.done = success + self.action_server.set_succeeded(result) + + @abc.abstractmethod + def create_queries(self, goal): + pass + + @abc.abstractmethod + def inference(self, img_msg, queries): + pass + + def send_request(self, content): + url = "http://{}:{}/{}".format(self.host, str(self.port), self.app_name) + try: + response = requests.post(url, data=content) + except ConnectionError as e: + rospy.logwarn_once("Cannot establish the connection with API server. Is it running?") + raise e + else: + if response.status_code == 200: + return response + else: + err_msg = "Invalid http status code: {}".format(str(response.status_code)) + rospy.logerr(err_msg) + raise RuntimeError(err_msg) + + +class ClipClientNode(DockerInferenceClientBase): + def __init__(self): + DockerInferenceClientBase.__init__(self, + ClassificationTaskAction, + ClassificationConfig, + ClassificationResult, + ClassificationTaskFeedback, + ClassificationTaskResult, + "inference") + + def topic_cb(self, data): + if not self.config: rospy.logwarn("No queries"); return + if not self.config.queries: rospy.logwarn("No queries"); return + queries = self.config.queries.split(";") + try: + msg = self.inference(data, queries) + except Exception: return + # publish debug image + self.image_pub.publish(data) + # publish classification result + msg.header = data.header + self.result_pub.publish(msg) + # publish probabilities result as string + vis_msg = "" + for i, label in enumerate(msg.label_names): + vis_msg += "{}: {:.2f}% ".format(label, msg.probabilities[i]*100) + vis_msg += "\n\nCosine Similarity\n" + for i, label in enumerate(msg.label_names): + vis_msg += "{}: {:.4f} ".format(label, msg.label_proba[i]) + self.vis_pub.publish(vis_msg) + + def create_queries(self, goal): + return goal.queries + + def inference(self, img_msg, queries): + img_byte = self.ros_img_to_base(img_msg) + req = json.dumps({"image": img_byte, + "queries": queries}).encode("utf-8") + response = self.send_request(req) + result_dic = json.loads(response.text)["results"] + labels = [] + probabilities = [] + similarities = [] + for r in result_dic: + labels.append(r["question"]) + probabilities.append(float(r["probability"])) + similarities.append(float(r["similarity"])) + labels = [label for _,label in sorted(zip(probabilities, labels), reverse=True)] + probabilities.sort(reverse=True) + similarities.sort(reverse=True) + # build ClassificationResult message + msg = self.result_topic_type() + msg.labels = list(range(len(labels))) + msg.label_names = labels + msg.label_proba = similarities # cosine similarities + msg.probabilities = probabilities # sum(probabilities) is 1 + msg.classifier = 'clip' + msg.target_names = queries + return msg + + +class OFAClientNode(DockerInferenceClientBase): + def __init__(self): + self.vqa_type = rospy.get_param("~vqa_type", default="caption") # caption, vqa_gen. caption is better than vqa_gen in OFA + if self.vqa_type not in ["caption", "vqa_gen"]: + raise RuntimeError("VQA type must be caption or vqa_gen") + DockerInferenceClientBase.__init__(self, + VQATaskAction, + VQAConfig, + VQAResult, + VQATaskFeedback, + VQATaskResult, + self.vqa_type) + + def create_queries(self, goal): + return goal.questions + + def inference(self, img_msg, queries): + img_byte = self.ros_img_to_base(img_msg) + req = json.dumps({"image": img_byte, + "queries": queries}).encode("utf-8") + response = self.send_request(req) + json_result = json.loads(response.text) + msg = self.result_topic_type() + for result in json_result["results"]: + result_msg = QuestionAndAnswerText() + result_msg.question = result["question"] + result_msg.answer = result["answer"] + msg.result.append(result_msg) + return msg + + def topic_cb(self, data): + if not self.config.questions: rospy.logwarn("No questions"); return + queries = self.config.questions.split(";") + try: + msg = self.inference(data, queries) + except Exception: return + self.image_pub.publish(data) + self.result_pub.publish(msg) + vis = "" + for qa in msg.result: + vis += "Q:{}\n A:{}\n".format(qa.question, + qa.answer) + self.vis_pub.publish(vis) diff --git a/jsk_perception/src/point_pose_extractor.cpp b/jsk_perception/src/point_pose_extractor.cpp index e94a5b7a32..3a49185548 100644 --- a/jsk_perception/src/point_pose_extractor.cpp +++ b/jsk_perception/src/point_pose_extractor.cpp @@ -32,677 +32,29 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. *********************************************************************/ - -#include -#include -#include -#if ( CV_MAJOR_VERSION >= 4) -#include -#include -#include -#else -#include -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "jsk_perception/point_pose_extractor.h" #include -namespace enc = sensor_msgs::image_encodings; - -bool _first_sample_change; - -void features2keypoint (posedetection_msgs::Feature0D features, - std::vector& keypoints, - cv::Mat& descriptors){ - keypoints.resize(features.scales.size()); - descriptors.create(features.scales.size(),features.descriptor_dim,CV_32FC1); - std::vector::iterator keypoint_it = keypoints.begin(); - for ( int i=0; keypoint_it != keypoints.end(); ++keypoint_it, ++i ) { - *keypoint_it = cv::KeyPoint(cv::Point2f(features.positions[i*2+0], - features.positions[i*2+1]), - features.descriptor_dim, // size - features.orientations[i], //angle - 0, // resonse - features.scales[i] // octave - ); - for (int j = 0; j < features.descriptor_dim; j++){ - descriptors.at(i,j) = - features.descriptors[i*features.descriptor_dim+j]; - } - } -} - - -class Matching_Template -{ -public: - std::string _matching_frame; - cv::Mat _template_img; - std::vector _template_keypoints; - cv::Mat _template_descriptors; - int _original_width_size; - int _original_height_size; - double _template_width; // width of template [m] - double _template_height; // height of template [m] - tf::Transform _relativepose; - cv::Mat _affine_matrix; - std::string _window_name; - // The maximum allowed reprojection error to treat a point pair as an inlier - double _reprojection_threshold; - // threshold on squared ratio of distances between NN and 2nd NN - double _distanceratio_threshold; - std::vector _correspondances; - cv::Mat _previous_stack_img; - - Matching_Template(){ - } - Matching_Template(cv::Mat img, - std::string matching_frame, - int original_width_size, - int original_height_size, - double template_width, - double template_height, - tf::Transform relativepose, - cv::Mat affine_matrix, - double reprojection_threshold, - double distanceratio_threshold, - std::string window_name, - bool autosize){ - - _template_img = img.clone(); - _matching_frame = matching_frame; - _original_width_size = original_width_size; - _original_height_size = original_height_size; - _template_width = template_width; - _template_height = template_height; - _relativepose = relativepose; - _affine_matrix = affine_matrix; - _window_name = window_name; - _reprojection_threshold = reprojection_threshold; - _distanceratio_threshold = distanceratio_threshold; - _template_keypoints.clear(); - _correspondances.clear(); - // cv::namedWindow(_window_name, autosize ? CV_WINDOW_AUTOSIZE : 0); - } - - - virtual ~Matching_Template(){ - std::cerr << "delete " << _window_name << std::endl; - } - - - std::string get_window_name(){ - return _window_name; - } - - std::vector* correspondances(){ - return &_correspondances; - } - - bool check_template (){ - if (_template_img.empty() && _template_keypoints.size() == 0 && - _template_descriptors.empty()){ - return false; - } - else { - return true; - } - } - - - int set_template(ros::ServiceClient client){ - posedetection_msgs::Feature0DDetect srv; - - if ( _template_img.empty()) { - ROS_ERROR ("template picture is empty."); - return -1; - } - ROS_INFO_STREAM("read template image and call " << client.getService() << " service"); - cv_bridge::CvImage cv_img; - cv_img.image = cv::Mat(_template_img); - cv_img.encoding = std::string("bgr8"); - srv.request.image = *(cv_img.toImageMsg()); - if (client.call(srv)) { - ROS_INFO_STREAM("get features with " << srv.response.features.scales.size() << " descriptoins"); - features2keypoint (srv.response.features, _template_keypoints, _template_descriptors); - - // inverse affine translation - cv::Mat M_inv; - cv::Mat dst; - cv::invert(_affine_matrix, M_inv); - // cv::drawKeypoints (tmp, _template_keypoints, dst, cv::Scalar::all(1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); - // cv::drawKeypoints (tmp, _template_keypoints, dst); - // cv::warpPerspective(_template_img, dst, M_inv, - // cv::Size(_original_width_size, _original_height_size), - // CV_INTER_LINEAR, IPL_BORDER_CONSTANT, 0); - for (int i = 0; i < (int)_template_keypoints.size(); i++){ - cv::Point2f pt; - cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt); - cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &_template_keypoints.at(i).pt); - cv::perspectiveTransform (pt_src_mat, pt_mat, M_inv); - _template_keypoints.at(i).pt = pt_mat.at(0,0); - } - // cvSetMouseCallback (_window_name.c_str(), &PointPoseExtractor::cvmousecb, this); - // _template_img = dst; - return 0; - } else { - ROS_ERROR("Failed to call service Feature0DDetect"); - return 1; - } - } - - - double log_fac( int n ) - { - static std::vector slog_table; - int osize = slog_table.size(); - if(osize <= n){ - slog_table.resize(n+1); - if(osize == 0){ - slog_table[0] = -1; - slog_table[1] = log(1.0); - osize = 2; - } - for(int i = osize; i <= n; i++ ){ - slog_table[i] = slog_table[i-1] + log(i); - } - } - return slog_table[n]; - } - - int min_inlier( int n, int m, double p_badsupp, double p_badxform ) - { - double pi, sum; - int i, j; - double lp_badsupp = log( p_badsupp ); - double lp_badsupp1 = log( 1.0 - p_badsupp ); - for( j = m+1; j <= n; j++ ){ - sum = 0; - for( i = j; i <= n; i++ ){ - pi = (i-m) * lp_badsupp + (n-i+m) * lp_badsupp1 + - log_fac( n - m ) - log_fac( i - m ) - log_fac( n - i ); - sum += exp( pi ); - } - if( sum < p_badxform ) - break; - } - return j; - } - - - bool estimate_od (ros::ServiceClient client, cv::Mat src_img, - std::vector sourceimg_keypoints, - image_geometry::PinholeCameraModel pcam, - double err_thr, cv::Mat &stack_img, - cv::flann::Index* ft, posedetection_msgs::Object6DPose* o6p){ - - if ( _template_keypoints.size()== 0 && - _template_descriptors.empty() ){ - set_template(client); - } - if ( _template_keypoints.size()== 0 && - _template_descriptors.empty() ) { - ROS_ERROR ("Template image was not set."); - return false; - } - // stacked image - cv::Size stack_size = cv::Size(MAX(src_img.cols,_template_img.cols), - src_img.rows+_template_img.rows); - stack_img = cv::Mat(stack_size,CV_8UC3); - stack_img = cv::Scalar(0); - cv::Mat stack_img_tmp(stack_img,cv::Rect(0,0,_template_img.cols,_template_img.rows)); - cv::add(stack_img_tmp, _template_img, stack_img_tmp); - cv::Mat stack_img_src(stack_img,cv::Rect(0,_template_img.rows,src_img.cols,src_img.rows)); - cv::add(stack_img_src, src_img, stack_img_src); - _previous_stack_img = stack_img.clone(); - - // matching - cv::Mat m_indices(_template_descriptors.rows, 2, CV_32S); - cv::Mat m_dists(_template_descriptors.rows, 2, CV_32F); - ft->knnSearch(_template_descriptors, m_indices, m_dists, 2, cv::flann::SearchParams(-1) ); - - // matched points - std::vector pt1, pt2; - std::vector queryIdxs,trainIdxs; - for ( unsigned int j = 0; j < _template_keypoints.size(); j++ ) { - if ( m_dists.at(j,0) < m_dists.at(j,1) * _distanceratio_threshold) { - queryIdxs.push_back(j); - trainIdxs.push_back(m_indices.at(j,0)); - } - } - if ( queryIdxs.size() == 0 ) { - ROS_WARN_STREAM("could not find matched points with distanceratio(" <<_distanceratio_threshold << ")"); - } else { - cv::KeyPoint::convert(_template_keypoints,pt1,queryIdxs); - cv::KeyPoint::convert(sourceimg_keypoints,pt2,trainIdxs); - } - - ROS_INFO ("Found %d total matches among %d template keypoints", (int)pt2.size(), (int)_template_keypoints.size()); - - cv::Mat H; - std::vector mask((int)pt2.size()); - - if ( pt1.size() > 4 ) { - // ToDO for curve face - H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2), mask, CV_RANSAC, _reprojection_threshold); - } - - // draw line - for (int j = 0; j < (int)pt1.size(); j++){ - cv::Point2f pt, pt_orig; - cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt); - cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &pt1.at(j)); - cv::perspectiveTransform (pt_src_mat, pt_mat, _affine_matrix); - pt_orig = pt_mat.at(0,0); - if ( mask.at(j)){ - cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows), - CV_RGB(0,255,0), 1,8,0); - } - else { - cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows), - CV_RGB(255,0,255), 1,8,0); - } - } - int inlier_sum = 0; - for (int k=0; k < (int)mask.size(); k++){ - inlier_sum += (int)mask.at(k); - } - - double text_scale = 1.5; - { - int fontFace = 0, thickness = 0, baseLine; - int x, y; - cv::Size text_size; - std::string text; - - text = "inlier: " + boost::lexical_cast((int)inlier_sum) + " / " + boost::lexical_cast((int)pt2.size()); - text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine); - x = stack_img.size().width - text_size.width; - y = text_size.height + thickness + 10; // 10pt pading - cv::putText (stack_img, text, cv::Point(x, y), - fontFace, text_scale, CV_RGB(0, 255, 0), - thickness, 8, false); - - text = "template: " + boost::lexical_cast((int)_template_keypoints.size()); - text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine); - x = stack_img.size().width - text_size.width; - y += text_size.height + thickness + 10; // 10pt pading - cv::putText (stack_img, text, cv::Point(x, y), - fontFace, text_scale, CV_RGB(0, 255, 0), - thickness, 8, false); - } - - // draw correspondances - ROS_INFO(" _correspondances.size: %d", (int)_correspondances.size()); - for (int j = 0; j < (int)_correspondances.size(); j++){ - cv::circle(stack_img, cv::Point2f(_correspondances.at(j).x, _correspondances.at(j).y + _template_img.size().height), - 8, CV_RGB(255,0,0), -1); - } - - ROS_INFO(" inlier_sum:%d min_lier:%d", inlier_sum, min_inlier((int)pt2.size(), 4, 0.10, 0.01)); - if ((cv::countNonZero( H ) == 0) || (inlier_sum < min_inlier((int)pt2.size(), 4, 0.10, 0.01))){ - ROS_INFO(" inlier_sum < min_lier return-from estimate-od"); - if( _window_name != "" ) - cv::imshow(_window_name, stack_img); - return false; - } - - std::string _type; - char chr[20]; - - // number of match points - sprintf(chr, "%d", (int)pt2.size()); - _type = _matching_frame + "_" + std::string(chr); - - sprintf(chr, "%d", inlier_sum); - _type = _type + "_" + std::string(chr); - - cv::Point2f corners2d[4] = {cv::Point2f(0,0), - cv::Point2f(_original_width_size,0), - cv::Point2f(_original_width_size,_original_height_size), - cv::Point2f(0,_original_height_size)}; - cv::Mat corners2d_mat (cv::Size(4, 1), CV_32FC2, corners2d); - cv::Point3f corners3d[4] = {cv::Point3f(0,0,0), - cv::Point3f(0,_template_width,0), - cv::Point3f(_template_height,_template_width,0), - cv::Point3f(_template_height,0,0)}; - cv::Mat corners3d_mat (cv::Size(4, 1), CV_32FC3, corners3d); - - cv::Mat corners2d_mat_trans; - - cv::perspectiveTransform (corners2d_mat, corners2d_mat_trans, H); - - double fR3[3], fT3[3]; - cv::Mat rvec(3, 1, CV_64FC1, fR3); - cv::Mat tvec(3, 1, CV_64FC1, fT3); - cv::Mat zero_distortion_mat = cv::Mat::zeros(4, 1, CV_64FC1); - - cv::solvePnP (corners3d_mat, corners2d_mat_trans, - pcam.intrinsicMatrix(), - zero_distortion_mat,//if unrectified: pcam.distortionCoeffs() - rvec, tvec); - - tf::Transform checktf, resulttf; - - checktf.setOrigin( tf::Vector3(fT3[0], fT3[1], fT3[2] ) ); - double rx = fR3[0], ry = fR3[1], rz = fR3[2]; - tf::Quaternion quat; - double angle = cv::norm(rvec); - quat.setRotation(tf::Vector3(rx/angle, ry/angle, rz/angle), angle); - checktf.setRotation( quat ); - - resulttf = checktf * _relativepose; - - ROS_INFO( " tx: (%0.2lf,%0.2lf,%0.2lf) rx: (%0.2lf,%0.2lf,%0.2lf)", - resulttf.getOrigin().getX(), - resulttf.getOrigin().getY(), - resulttf.getOrigin().getZ(), - resulttf.getRotation().getAxis().x() * resulttf.getRotation().getAngle(), - resulttf.getRotation().getAxis().y() * resulttf.getRotation().getAngle(), - resulttf.getRotation().getAxis().z() * resulttf.getRotation().getAngle()); - - o6p->pose.position.x = resulttf.getOrigin().getX(); - o6p->pose.position.y = resulttf.getOrigin().getY(); - o6p->pose.position.z = resulttf.getOrigin().getZ(); - o6p->pose.orientation.w = resulttf.getRotation().w(); - o6p->pose.orientation.x = resulttf.getRotation().x(); - o6p->pose.orientation.y = resulttf.getRotation().y(); - o6p->pose.orientation.z = resulttf.getRotation().z(); - o6p->type = _matching_frame; // _type - - // draw 3d cube model - std::vector projected_top; - { - tf::Vector3 coords[8] = {tf::Vector3(0,0,0), - tf::Vector3(0, _template_width, 0), - tf::Vector3(_template_height, _template_width,0), - tf::Vector3(_template_height, 0, 0), - tf::Vector3(0, 0, -0.03), - tf::Vector3(0, _template_width, -0.03), - tf::Vector3(_template_height, _template_width, -0.03), - tf::Vector3(_template_height, 0, -0.03)}; - - projected_top = std::vector(8); - - for(int i=0; i<8; i++) { - coords[i] = checktf * coords[i]; - cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ()); - projected_top[i] = pcam.project3dToPixel(pt); - } - } - - { // check if the matched region does not too big or too small - float max_x, max_y, min_x, min_y; - max_x = max_y = -1e9; - min_x = min_y = 1e9; - for (int j = 0; j < 4; j++){ - cv::Point2f pt = corners2d_mat_trans.at(0,j); - max_x = std::max(max_x, pt.x), max_y = std::max(max_y, pt.y); - min_x = std::min(min_x, pt.x), min_y = std::min(min_y, pt.y); - } - if((max_x - min_x) < 30 || (max_y - min_y) < 30 || - src_img.rows < (max_x - min_x)/2 || src_img.cols < (max_y - min_y)/2){ - ROS_INFO(" matched region is too big or small (2< && <30) width:%f height:%f return-from estimate-od", max_x - min_x, max_y - min_y); - return false; - } - } - - double err_sum = 0; - bool err_success = true; - for (int j = 0; j < 4; j++){ - double err = sqrt(pow((corners2d_mat_trans.at(0,j).x - projected_top.at(j).x), 2) + - pow((corners2d_mat_trans.at(0,j).y - projected_top.at(j).y), 2)); - err_sum += err; - } - if (err_sum > err_thr){ - ROS_INFO(" err_sum:%f > err_thr:%f return-from estimate-od", err_sum, err_thr); - err_success = false; - } else { - o6p->reliability = 1.0 - (err_sum / err_thr); - } - // draw lines around the detected object - for (int j = 0; j < corners2d_mat_trans.cols; j++){ - cv::Point2f p1(corners2d_mat_trans.at(0,j).x, - corners2d_mat_trans.at(0,j).y+_template_img.rows); - cv::Point2f p2(corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).x, - corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).y+_template_img.rows); - cv::line (stack_img, p1, p2, CV_RGB(255, 0, 0), - (err_success?4:1), // width - CV_AA, 0); - } - - // draw 3d cube model - if(projected_top.size() == 8) { // verify, the size is 8 - int cnt = 8; - std::vector ps(cnt); - for(int i=0; i ps(4); - - for(int i=0; i<4; i++) { - coords[i] = resulttf * coords[i]; - cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ()); - ps[i] = pcam.project3dToPixel(pt); - ps[i].y += _template_img.rows; // draw on camera image - } - - cv::line (stack_img, ps[0], ps[1], CV_RGB(255, 0, 0), 3, CV_AA, 0); - cv::line (stack_img, ps[0], ps[2], CV_RGB(0, 255, 0), 3, CV_AA, 0); - cv::line (stack_img, ps[0], ps[3], CV_RGB(0, 0, 255), 3, CV_AA, 0); - } - - // write text on image - { - std::string text; - int x, y; - text = "error: " + boost::lexical_cast(err_sum); - x = stack_img.size().width - 16*17*text_scale; // 16pt * 17 - y = _template_img.size().height - (16 + 2)*text_scale*6; - cv::putText (stack_img, text, cv::Point(x, y), - 0, text_scale, CV_RGB(0, 255, 0), - 2, 8, false); - ROS_INFO(" %s < %f (threshold)", text.c_str(), err_thr ); - } - // for debug window - if( _window_name != "" ) - cv::imshow(_window_name, stack_img); - - return err_success; - } -}; // the end of difinition of class Matching_Template - - -class PointPoseExtractor +namespace jsk_perception { - ros::NodeHandle _n; - image_transport::ImageTransport it; - ros::Subscriber _sub; - ros::ServiceServer _server; - ros::ServiceClient _client; - ros::Publisher _pub, _pub_agg, _pub_pose; - tf::TransformBroadcaster _br; - image_transport::Publisher _debug_pub; - double _reprojection_threshold; - double _distanceratio_threshold; - double _th_step; - double _phi_step; - bool _autosize; - double _err_thr; - static std::vector _templates; - image_geometry::PinholeCameraModel pcam; - bool pnod; - bool _initialized; - bool _viewer; - bool _publish_tf; - std::string _child_frame_id; - -public: - PointPoseExtractor() : it(ros::NodeHandle("~")) { - // _sub = _n.subscribe("ImageFeature0D", 1, - // &PointPoseExtractor::imagefeature_cb, this); - _client = _n.serviceClient("Feature0DDetect"); - _pub = _n.advertise("ObjectDetection", 10); - _pub_agg = _n.advertise("ObjectDetection_agg", 10); - _pub_pose = _n.advertise("object_pose", 10); - _debug_pub = it.advertise("debug_image", 1); - _server = _n.advertiseService("SetTemplate", &PointPoseExtractor::settemplate_cb, this); - _initialized = false; - } - - virtual ~PointPoseExtractor(){ + PointPoseExtractor::~PointPoseExtractor(){ _sub.shutdown(); _client.shutdown(); _pub.shutdown(); _pub_agg.shutdown(); } - static void make_template_from_mousecb(Matching_Template *mt){ - cv::Mat H; - cv::Mat tmp_template, tmp_warp_template; - std::vectorpt1, pt2; - double width, height; - std::string filename; - std::cout << "input template's [width]" << std::endl; - std::cin >> width; - std::cout << "input template's [height]" << std::endl; - std::cin >> height; - std::cout << "input template's [filename]" << std::endl; - std::cin >> filename; - - for (int i = 0; i < 4; i++){ - pt1.push_back(cv::Point2d((int)mt->_correspondances.at(i).x, - (int)mt->_correspondances.at(i).y + mt->_template_img.size().height)); - } - cv::Rect rect = cv::boundingRect(cv::Mat(pt1)); - double scale = std::max(width, height) / 500.0; - int iwidth = width / scale, iheight = height / scale; - pt2.push_back(cv::Point2d(0,0)); - pt2.push_back(cv::Point2d(iwidth,0)); - pt2.push_back(cv::Point2d(iwidth,iheight)); - pt2.push_back(cv::Point2d(0, iheight)); - H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2)); - - cv::getRectSubPix(mt->_previous_stack_img, rect.size(), - cv::Point2f((rect.tl().x + rect.br().x)/2.0,(rect.tl().y + rect.br().y)/2.0), - tmp_template); - cv::warpPerspective(mt->_previous_stack_img, tmp_warp_template, H, cv::Size(iwidth, iheight)); - - try { - cv::imwrite(filename,tmp_template); - boost::filesystem::path fname(filename); - std::stringstream ss; - ss << fname.stem() << "_wrap" << fname.extension(); - cv::imwrite(ss.str(),tmp_warp_template); - }catch (cv::Exception e) { - std::cerr << e.what() << std::endl; - } - - for (int i = 0; i < (int)pt1.size(); i++){ - pt2.push_back(cv::Point2d((int)pt1.at(i).x - rect.x, - (int)pt1.at(i).y - rect.y - mt->_template_img.size().height)); - } - // cv::Mat mask_img = cv::Mat::zeros(tmp_template.size(), CV_8UC3); - // cv::fillConvexPoly(mask_img, pt2.begin(), (int)pt2.size(), CV_RGB(255, 255, 255)); - - // cv::namedWindow("hoge", 1); - // cv::imshow("hoge", mask_img); - - cv::Mat M = (cv::Mat_(3,3) << 1,0,0, 0,1,0, 0,0,1); - std::string window_name = "sample" + boost::lexical_cast((int)_templates.size()); - - Matching_Template* tmplt = - new Matching_Template (tmp_warp_template, "sample", - tmp_warp_template.size().width, tmp_warp_template.size().height, - width, height, - tf::Transform(tf::Quaternion(0, 0, 0, 1), tf::Vector3(0, 0, 0)), - M, - mt->_reprojection_threshold, - mt->_distanceratio_threshold, - _first_sample_change ? window_name : mt->_window_name, - cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE)); - - mt->_correspondances.clear(); - _templates.push_back(tmplt); - cv::namedWindow(_first_sample_change ? window_name : mt->_window_name, - cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE)); - cvSetMouseCallback (_first_sample_change ? window_name.c_str() : mt->_window_name.c_str(), - &cvmousecb, static_cast(_templates.back())); - _first_sample_change = true; - } - - static void cvmousecb (int event, int x, int y, int flags, void* param){ - Matching_Template *mt = (Matching_Template*)param; - // std::cerr << "mousecb_ -> " << mt << std::endl; - switch (event){ - case CV_EVENT_LBUTTONUP: { - cv::Point2d pt(x,y - (int)mt->_template_img.size().height); - ROS_INFO("add correspondence (%d, %d)", (int)pt.x, (int)pt.y); - mt->_correspondances.push_back(pt); - if ((int)mt->_correspondances.size() >= 4){ - make_template_from_mousecb(mt); - mt->_correspondances.clear(); - ROS_INFO("reset"); - } - break; - } - case CV_EVENT_RBUTTONUP: { - mt->_correspondances.clear(); - ROS_INFO("reset"); - break; - } - } - } - - - void initialize () { + void PointPoseExtractor::initialize () { std::string _pose_str; double template_width; double template_height; std::string template_filename; std::string window_name; - ros::NodeHandle local_nh("~"); - local_nh.param("child_frame_id", _child_frame_id, std::string("matching")); - local_nh.param("object_width", template_width, 0.06); - local_nh.param("object_height", template_height, 0.0739); - local_nh.param("relative_pose", _pose_str, std::string("0 0 0 0 0 0 1")); + pnh_->param("child_frame_id", _child_frame_id, std::string("matching")); + pnh_->param("object_width", template_width, 0.06); + pnh_->param("object_height", template_height, 0.0739); + pnh_->param("relative_pose", _pose_str, std::string("0 0 0 0 0 0 1")); std::string default_template_file_name; try { #ifdef ROSPACK_EXPORT @@ -719,19 +71,17 @@ class PointPoseExtractor #endif } catch (std::runtime_error &e) { } - local_nh.param("template_filename", template_filename, default_template_file_name); - local_nh.param("reprojection_threshold", _reprojection_threshold, 3.0); - local_nh.param("distanceratio_threshold", _distanceratio_threshold, 0.49); - local_nh.param("error_threshold", _err_thr, 50.0); - local_nh.param("theta_step", _th_step, 5.0); - local_nh.param("phi_step", _phi_step, 5.0); - local_nh.param("viewer_window", _viewer, true); - local_nh.param("window_name", window_name, std::string("sample1")); - local_nh.param("autosize", _autosize, false); - local_nh.param("publish_null_object_detection", pnod, false); - local_nh.param("publish_tf", _publish_tf, false); - - _first_sample_change = false; + pnh_->param("template_filename", template_filename, default_template_file_name); + pnh_->param("reprojection_threshold", _reprojection_threshold, 3.0); + pnh_->param("distanceratio_threshold", _distanceratio_threshold, 0.49); + pnh_->param("error_threshold", _err_thr, 50.0); + pnh_->param("theta_step", _th_step, 5.0); + pnh_->param("phi_step", _phi_step, 5.0); + pnh_->param("viewer_window", _viewer, true); + pnh_->param("window_name", window_name, std::string("sample1")); + pnh_->param("autosize", _autosize, false); + pnh_->param("publish_null_object_detection", pnod, false); + pnh_->param("publish_tf", _publish_tf, false); // make one template cv::Mat template_img; @@ -741,6 +91,8 @@ class PointPoseExtractor return; } + _first_sample_change = false; + // relative pose std::vector rv(7); std::istringstream iss(_pose_str); @@ -750,11 +102,11 @@ class PointPoseExtractor if (iss.eof()) { // use rpy expression transform = tf::Transform(tf::createQuaternionFromRPY(rv[3], rv[4], rv[5]), - tf::Vector3(rv[0], rv[1], rv[2])); + tf::Vector3(rv[0], rv[1], rv[2])); } else { // use quaternion expression iss >> rv[6]; transform = tf::Transform(tf::Quaternion(rv[3], rv[4], rv[5], rv[6]), - tf::Vector3(rv[0], rv[1], rv[2])); + tf::Vector3(rv[0], rv[1], rv[2])); } // add the image to template list @@ -764,8 +116,8 @@ class PointPoseExtractor } // initialize - cv::Mat make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec, - double template_width, double template_height, cv::Size &size){ + cv::Mat PointPoseExtractor::make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec, + double template_width, double template_height, cv::Size &size){ cv::Point3f coner[4] = {cv::Point3f(-(template_width/2.0), -(template_height/2.0),0), cv::Point3f(template_width/2.0,-(template_height/2.0),0), @@ -806,11 +158,11 @@ class PointPoseExtractor } - int make_warped_images (cv::Mat src, std::vector &imgs, - std::vector &Mvec, - double template_width, double template_height, - double th_step, double phi_step){ - + int PointPoseExtractor::make_warped_images (cv::Mat src, std::vector &imgs, + std::vector &Mvec, + double template_width, double template_height, + double th_step, double phi_step){ + std::vector sizevec; for (int i = (int)((-3.14/4.0)/th_step); i*th_step < 3.14/4.0; i++){ @@ -831,11 +183,13 @@ class PointPoseExtractor cv::Mat M; cv::Size size; M = make_homography(src, rvec, tvec, template_width, template_height, size); - Mvec.push_back(M); - sizevec.push_back(size); + //Whenever an H matrix cannot be estimated, cv::findHomography returns an empty one. + if(!M.empty()){ + Mvec.push_back(M); + sizevec.push_back(size); + } } } - for (int i = 0; i < (int)Mvec.size(); i++){ cv::Mat dst; cv::warpPerspective(src, dst, Mvec.at(i), sizevec.at(i), @@ -845,9 +199,9 @@ class PointPoseExtractor return 0; } - bool add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose, - double template_width, double template_height, - double theta_step=5.0, double phi_step=5.0) + bool PointPoseExtractor::add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose, + double template_width, double template_height, + double theta_step=5.0, double phi_step=5.0) { std::vector imgs; std::vector Mvec; @@ -872,36 +226,35 @@ class PointPoseExtractor (_viewer ? type : ""), _autosize); _templates.push_back(tmplt); if( _viewer ) - { - cv::namedWindow(type, _autosize ? CV_WINDOW_AUTOSIZE : 0); - cvSetMouseCallback (type.c_str(), &cvmousecb, static_cast(_templates.back())); - } + { + cv::namedWindow(type, _autosize ? CV_WINDOW_AUTOSIZE : 0); + cvSetMouseCallback (type.c_str(), &cvmousecb, this); + } } return true; } - bool settemplate_cb (jsk_perception::SetTemplate::Request &req, - jsk_perception::SetTemplate::Response &res){ - cv_bridge::CvImagePtr cv_ptr; - cv_ptr = cv_bridge::toCvCopy(req.image, enc::BGR8); - cv::Mat img(cv_ptr->image); + bool PointPoseExtractor::settemplate_cb (jsk_perception::SetTemplate::Request &req, + jsk_perception::SetTemplate::Response &res){ + if ( !_initialized ) { + ROS_WARN("SetTemplate service is available only after receiving input ImageFeature0D"); + return false; + } + cv_bridge::CvImagePtr cv_ptr; + cv_ptr = cv_bridge::toCvCopy(req.image, enc::BGR8); + cv::Mat img(cv_ptr->image); - tf::Transform transform(tf::Quaternion(req.relativepose.orientation.x, - req.relativepose.orientation.y, - req.relativepose.orientation.z, - req.relativepose.orientation.w), - tf::Vector3(req.relativepose.position.x, - req.relativepose.position.y, - req.relativepose.position.z)); + tf::Transform transform; + tf::poseMsgToTF(req.relativepose, transform); - // add the image to template list - add_new_template(img, req.type, transform, - req.dimx, req.dimy, 1.0, 1.0); - return true; + // add the image to template list + add_new_template(img, req.type, transform, + req.dimx, req.dimy, 1.0, 1.0); + return true; } - void imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg){ + void PointPoseExtractor::imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg){ std::vector sourceimg_keypoints; cv::Mat sourceimg_descriptors; std::vector vo6p; @@ -957,31 +310,22 @@ class PointPoseExtractor // Publish result as geometry_msgs/PoseStamped. But it can only contain one object geometry_msgs::PoseStamped pose_msg; pose_msg.header = od.header; - pose_msg.pose = od.objects[0].pose; - _pub_pose.publish(pose_msg); + if ((int)vo6p.size() != 0) { + pose_msg.pose = od.objects[0].pose; + _pub_pose.publish(pose_msg); + } // broadcast tf if ( this->_publish_tf ) { - tf::Transform transform( - tf::Quaternion( - pose_msg.pose.orientation.x, - pose_msg.pose.orientation.y, - pose_msg.pose.orientation.z, - pose_msg.pose.orientation.w - ), - tf::Vector3( - pose_msg.pose.position.x, - pose_msg.pose.position.y, - pose_msg.pose.position.z - ) - ); + tf::Transform transform; + tf::poseMsgToTF(pose_msg.pose, transform); _br.sendTransform( - tf::StampedTransform( - transform, - msg->image.header.stamp, - msg->image.header.frame_id, - _child_frame_id - ) - ); + tf::StampedTransform( + transform, + msg->image.header.stamp, + msg->image.header.frame_id, + _child_frame_id + ) + ); } } } @@ -991,28 +335,43 @@ class PointPoseExtractor cv::waitKey( 10 ); } - /* if there are subscribers of the output topic -> do work - else if -> unregister all topics this node subscribing - */ - void check_subscribers() + void PointPoseExtractor::onInit() { - if(_pub.getNumSubscribers() == 0 && _initialized) { - if(_sub) - _sub.shutdown(); - static int i = 0; - if ( i++ % 100 == 0 ) { - ROS_INFO("wait for subscriberes ... %s", _pub.getTopic().c_str()); - } - } else { - if(!_sub) - _sub = _n.subscribe("ImageFeature0D", 1, - &PointPoseExtractor::imagefeature_cb, this); - } + DiagnosticNodelet::onInit(); + dynamic_reconfigure::Server server; + dynamic_reconfigure::Server::CallbackType f; + f = boost::bind(&PointPoseExtractor::dyn_conf_callback, this, _1, _2); + server.setCallback(f); + + it = new image_transport::ImageTransport(*pnh_); + // Use nh_ instead of pnh_ for backward compatibility. + // See https://github.com/jsk-ros-pkg/jsk_recognition/pull/2779 and + // https://github.com/jsk-ros-pkg/jsk_recognition/pull/2778 + _client = nh_->serviceClient("Feature0DDetect"); + _pub = advertise(*nh_, "ObjectDetection", 10); + _pub_agg = advertise(*nh_, "ObjectDetection_agg", 10); + _pub_pose = advertise(*nh_, "object_pose", 10); + _debug_pub = it->advertise("debug_image", 1); + _server = nh_->advertiseService("SetTemplate", &PointPoseExtractor::settemplate_cb, this); + _initialized = false; + + onInitPostProcess(); + } + + void PointPoseExtractor::subscribe() + { + _sub = nh_->subscribe("ImageFeature0D", 1, + &PointPoseExtractor::imagefeature_cb, this); + } + + void PointPoseExtractor::unsubscribe() + { + _sub.shutdown(); } /* callback for dynamic reconfigure */ - void dyn_conf_callback(jsk_perception::point_pose_extractorConfig &config, - uint32_t level) { + void PointPoseExtractor::dyn_conf_callback(Config &config, + uint32_t level) { std::cout << "id = " << config.template_id << std::endl; std::cout << "lvl = " << level << std::endl; if((int)_templates.size() <= config.template_id) { @@ -1039,25 +398,5 @@ class PointPoseExtractor }; // the end of definition of class PointPoseExtractor - -std::vector PointPoseExtractor::_templates; - -int main (int argc, char **argv){ - ros::init (argc, argv, "PointPoseExtractor"); - - PointPoseExtractor matcher; - - dynamic_reconfigure::Server server; - dynamic_reconfigure::Server::CallbackType f; - f = boost::bind(&PointPoseExtractor::dyn_conf_callback, &matcher, _1, _2); - server.setCallback(f); - - ros::Rate r(10); // 10 hz - while(ros::ok()) { - ros::spinOnce(); - matcher.check_subscribers(); - r.sleep(); - } - - return 0; -} +#include +PLUGINLIB_EXPORT_CLASS (jsk_perception::PointPoseExtractor, nodelet::Nodelet); diff --git a/jsk_perception/src/split_image.cpp b/jsk_perception/src/split_image.cpp new file mode 100644 index 0000000000..e84e7280bd --- /dev/null +++ b/jsk_perception/src/split_image.cpp @@ -0,0 +1,101 @@ +// -*- mode: c++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2023, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ +/* + * split_image.cpp + * Author: Yoshiki Obinata + */ + + +#include "jsk_perception/split_image.h" + +namespace jsk_perception{ + + void SplitImage::onInit(){ + DiagnosticNodelet::always_subscribe_ = true; + DiagnosticNodelet::onInit(); + pnh_->param("vertical_parts", vertical_parts_, 1); + pnh_->param("horizontal_parts", horizontal_parts_, 1); + for(int i=0; i(*pnh_, pub_name, 1); + pubs_.push_back(pub_); + } + } + onInitPostProcess(); + } + + SplitImage::~SplitImage(){} + + void SplitImage::subscribe(){ + sub_ = pnh_->subscribe("input", 1, &SplitImage::splitImage, this); + } + + void SplitImage::unsubscribe(){ + sub_.shutdown(); + } + + void SplitImage::splitImage(const sensor_msgs::Image::ConstPtr& msg){ + cv_bridge::CvImagePtr cv_ptr; + try{ + cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8); + }catch(cv_bridge::Exception& e){ + NODELET_ERROR("cv_bridge exception: %s", e.what()); + return; + } + cv::Mat image = cv_ptr->image; + int height = image.rows; + int width = image.cols; + int vertical_size = height / vertical_parts_; + int horizontal_size = width / horizontal_parts_; + for(int i=0; iheader = msg->header; + part_cv_ptr->encoding = sensor_msgs::image_encodings::BGR8; + part_cv_ptr->image = part_image; + sensor_msgs::ImagePtr part_msg = part_cv_ptr->toImageMsg(); + pubs_.at(i*horizontal_parts_+j).publish(part_msg); + } + } + } +} + +#include +PLUGINLIB_EXPORT_CLASS(jsk_perception::SplitImage, nodelet::Nodelet); diff --git a/jsk_perception/src/video_to_scene.cpp b/jsk_perception/src/video_to_scene.cpp new file mode 100644 index 0000000000..6d323d9cb0 --- /dev/null +++ b/jsk_perception/src/video_to_scene.cpp @@ -0,0 +1,112 @@ +// -*- mode: C++ -*- +/********************************************************************* + * Software License Agreement (BSD License) + * + * Copyright (c) 2022, JSK Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * * Neither the name of the JSK Lab nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *********************************************************************/ +/* + * video_to_scene.cpp + * Author: Kei Okada + */ + +#include "jsk_perception/video_to_scene.h" +#include + +namespace jsk_perception{ + void VideoToScene::onInit(){ + DiagnosticNodelet::onInit(); + + bgsubtractor = cv::bgsegm::createBackgroundSubtractorGMG(); + pnh_->param("min_percent", min_percent_, 5); + pnh_->param("max_percent", max_percent_, 20); + captured_ = false; + + srv_ = boost::make_shared > (*pnh_); + dynamic_reconfigure::Server::CallbackType f = + boost::bind(&VideoToScene::configCallback, this, _1, _2); + srv_->setCallback (f); + + //pub_ = advertise(*pnh_, "output", 1); + pub_ = advertiseImage(*pnh_, "output", 1); + + onInitPostProcess(); + } + + void VideoToScene::subscribe(){ + std::string transport; + pnh_->param("image_transport", transport, std::string("raw")); + NODELET_INFO_STREAM("Using transport \"" << transport << "\""); + image_transport::TransportHints hints(transport, ros::TransportHints(), *pnh_); + // + it_.reset(new image_transport::ImageTransport(*pnh_)); + sub_ = it_->subscribe(pnh_->resolveName("input"), 1, &VideoToScene::work, this, hints); + ros::V_string names = boost::assign::list_of("input"); + jsk_topic_tools::warnNoRemap(names); + } + + void VideoToScene::unsubscribe(){ + sub_.shutdown(); + } + + void VideoToScene::work(const sensor_msgs::Image::ConstPtr& image_msg){ + cv::Mat image; + + vital_checker_ -> poke(); + boost::mutex::scoped_lock lock(mutex_); + + image = cv_bridge::toCvShare(image_msg, sensor_msgs::image_encodings::RGB8) -> image; + cv::resize(image, image, cv::Size(), 300.0/image.cols, 300.0/image.cols); + + cv::Mat bgmask; + bgsubtractor->apply(image, bgmask); + cv::erode(bgmask, bgmask, cv::Mat(), cv::Point(-1, -1), 2); + cv::dilate(bgmask, bgmask, cv::Mat(), cv::Point(-1, -1), 2); + + int p = cv::countNonZero(bgmask) / float(bgmask.cols * bgmask.rows) * 100; + NODELET_DEBUG_STREAM("p = " << p << ", min_percent = " << min_percent_ << ", max_percent = " << max_percent_ << ", captured = " << captured_); + + if ( p < min_percent_ && !captured_ ) { + captured_ = true; + pub_.publish(image_msg); + } else if ( captured_ && p >= max_percent_ ) { + captured_ = false; + } + } + + void VideoToScene::configCallback(Config &config, uint32_t level){ + boost::mutex::scoped_lock lock(mutex_); + min_percent_ = config.min_percent; + max_percent_ = config.max_percent; + } +} + +#include +PLUGINLIB_EXPORT_CLASS (jsk_perception::VideoToScene, nodelet::Nodelet); diff --git a/jsk_perception/test/apply_context_to_label_probability.test b/jsk_perception/test/apply_context_to_label_probability.test index 301f2c8348..8629c18214 100644 --- a/jsk_perception/test/apply_context_to_label_probability.test +++ b/jsk_perception/test/apply_context_to_label_probability.test @@ -8,12 +8,12 @@ + retry="3" time-limit="120"> topic_0: /apply_context_to_label_probability/output - timeout_0: 60 + timeout_0: 120 topic_1: /apply_context_to_label_probability/output/label - timeout_1: 60 + timeout_1: 120 diff --git a/jsk_perception/test/deep_sort_tracker.test b/jsk_perception/test/deep_sort_tracker.test index a421e64707..a12d814fd8 100644 --- a/jsk_perception/test/deep_sort_tracker.test +++ b/jsk_perception/test/deep_sort_tracker.test @@ -8,12 +8,12 @@ + retry="3" time-limit="120"> topic_0: /deep_sort_tracker/output/viz - timeout_0: 60 + timeout_0: 120 topic_1: /deep_sort_tracker/output/labels - timeout_1: 60 + timeout_1: 120 diff --git a/jsk_perception/test/face_pose_estimation.test b/jsk_perception/test/face_pose_estimation.test index 35d8e6d5b9..ebb24ce560 100644 --- a/jsk_perception/test/face_pose_estimation.test +++ b/jsk_perception/test/face_pose_estimation.test @@ -7,14 +7,14 @@ + retry="3" time-limit="120"> topic_0: /face_pose_estimation/output/rects - timeout_0: 60 + timeout_0: 120 topic_1: /face_pose_estimation/output/pose - timeout_1: 60 + timeout_1: 120 topic_2: /face_pose_estimation/output/gender - timeout_2: 60 + timeout_2: 120 diff --git a/jsk_perception/test/fcn_object_segmentation.test b/jsk_perception/test/fcn_object_segmentation.test index dd65968f1e..c7571d8e1c 100644 --- a/jsk_perception/test/fcn_object_segmentation.test +++ b/jsk_perception/test/fcn_object_segmentation.test @@ -11,10 +11,10 @@ + time-limit="120"> topic_0: /fcn_object_segmentation/output - timeout_0: 60 + timeout_0: 120 diff --git a/jsk_perception/test/human_mesh_recovery.test b/jsk_perception/test/human_mesh_recovery.test index e59a9acf20..1992a43d0b 100644 --- a/jsk_perception/test/human_mesh_recovery.test +++ b/jsk_perception/test/human_mesh_recovery.test @@ -7,10 +7,10 @@ + retry="3" time-limit="120"> topic_0: /human_mesh_recovery/output/pose - timeout_0: 60 + timeout_0: 100 diff --git a/jsk_perception/test/invert_mask_image.test b/jsk_perception/test/invert_mask_image.test new file mode 100644 index 0000000000..73dff168cc --- /dev/null +++ b/jsk_perception/test/invert_mask_image.test @@ -0,0 +1,19 @@ + + + + + + + + + topic_0: /invert_mask_image/output + timeout_0: 30 + check_after_kill_node: true + node_names: [invert_mask_image,] + + + + diff --git a/jsk_perception/test/label_image_classifier.test b/jsk_perception/test/label_image_classifier.test index ba37c915b7..c4e476bf14 100644 --- a/jsk_perception/test/label_image_classifier.test +++ b/jsk_perception/test/label_image_classifier.test @@ -9,10 +9,10 @@ + retry="3" time-limit="120"> topic_0: /label_image_classifier/output - timeout_0: 60 + timeout_0: 120 diff --git a/jsk_perception/test/people_pose_estimation_2d.test b/jsk_perception/test/people_pose_estimation_2d.test index 5fcd015d13..af4fc84494 100644 --- a/jsk_perception/test/people_pose_estimation_2d.test +++ b/jsk_perception/test/people_pose_estimation_2d.test @@ -7,16 +7,16 @@ + retry="3" time-limit="120"> topic_0: /people_pose_estimation_2d/output - timeout_0: 60 + timeout_0: 120 topic_1: /people_pose_estimation_2d/pose - timeout_1: 60 + timeout_1: 120 topic_2: /people_pose_estimation_2d/pose_2d - timeout_2: 60 + timeout_2: 120 topic_3: /people_pose_estimation_2d/skeleton - timeout_3: 60 + timeout_3: 120 diff --git a/jsk_perception/test/point_pose_extractor.test b/jsk_perception/test/point_pose_extractor.test new file mode 100644 index 0000000000..102747e55b --- /dev/null +++ b/jsk_perception/test/point_pose_extractor.test @@ -0,0 +1,22 @@ + + + + + + + + + topic_0: /ObjectDetection + timeout_0: 30 + topic_1: /ObjectDetection_agg + timeout_1: 30 + topic_2: /object_pose + timeout_2: 30 + topic_3: /point_pose_extractor/debug_image + timeout_3: 30 + + + + diff --git a/jsk_perception/test/point_pose_extractor_group_ns.test b/jsk_perception/test/point_pose_extractor_group_ns.test new file mode 100644 index 0000000000..0ed56ad043 --- /dev/null +++ b/jsk_perception/test/point_pose_extractor_group_ns.test @@ -0,0 +1,50 @@ + + + + + + + + file_name: $(find jsk_perception)/sample/image/items_in_shelf.jpg + encoding: bgr8 + publish_info: true + fovx: 84.1 + fovy: 53.8 + + + + + + + + + + + template_filename: $(find jsk_perception)/sample/image/pai_no_mi.jpg + template_id: 0 + object_width: 0.145 + object_height: 0.128 + viewer_window: $(arg gui) + + + + + + + topic_0: /test_template_matching/ObjectDetection + timeout_0: 30 + topic_1: /test_template_matching/ObjectDetection_agg + timeout_1: 30 + topic_2: /test_template_matching/object_pose + timeout_2: 30 + topic_3: /test_template_matching/point_pose_extractor/debug_image + timeout_3: 30 + + + + diff --git a/jsk_perception/test/probability_image_classifier.test b/jsk_perception/test/probability_image_classifier.test index 71cb98c7c5..8f25ae32db 100644 --- a/jsk_perception/test/probability_image_classifier.test +++ b/jsk_perception/test/probability_image_classifier.test @@ -9,10 +9,10 @@ + retry="3" time-limit="120"> topic_0: /probability_image_classifier/output - timeout_0: 60 + timeout_0: 120 diff --git a/jsk_perception/test/publish_mouse_event.py b/jsk_perception/test/publish_mouse_event.py new file mode 100755 index 0000000000..899d505fd3 --- /dev/null +++ b/jsk_perception/test/publish_mouse_event.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# copied from image_view2 +# workaround until https://github.com/jsk-ros-pkg/jsk_common/pull/1774 is merged and released +# + +import cv2 +import numpy as np + +import rospy +from sensor_msgs.msg import Image +import cv_bridge +from image_view2.msg import MouseEvent + + +def main(): + pub_plus = rospy.Publisher('~plus_rect_event', MouseEvent, queue_size=1) + pub_minus = rospy.Publisher('~minus_rect_event', MouseEvent, queue_size=1) + + width = int(rospy.get_param('~image_width')) + height = int(rospy.get_param('~image_height')) + plus_events = [ + MouseEvent(type=3, x=int(width/4), y=int(height/4), width=width, height=height), + MouseEvent(type=4, x=int(width/2), y=int(height/2), width=width, height=height), + MouseEvent(type=2, x=int(3*width/4), y=int(3*height/4), width=width, height=height), + ] + minus_events = [ + MouseEvent(type=3, x=int(3*width/4), y=int(3*height/4), width=width, height=height), + MouseEvent(type=4, x=int(width/2), y=int(height/2), width=width, height=height), + MouseEvent(type=2, x=int(width/4), y=int(height/4), width=width, height=height), + ] + rate = rospy.Rate(10) + while not rospy.is_shutdown(): + for e in plus_events: + e.header.stamp = rospy.get_rostime() + pub_plus.publish(e) + rate.sleep() + for e in minus_events: + e.header.stamp = rospy.get_rostime() + pub_minus.publish(e) + rate.sleep() + + +if __name__ == '__main__': + rospy.init_node('publish_mouse_event') + main() diff --git a/jsk_perception/test/ssd_object_detector.test b/jsk_perception/test/ssd_object_detector.test index 24992e14ac..a23ce97b05 100644 --- a/jsk_perception/test/ssd_object_detector.test +++ b/jsk_perception/test/ssd_object_detector.test @@ -5,7 +5,7 @@ + retry="3" time-limit="120" > topic_0: /ssd_object_detector/output/image timeout_0: 120 diff --git a/jsk_perception/test/tile_image.test b/jsk_perception/test/tile_image.test index 73a211d0d1..6996c73781 100644 --- a/jsk_perception/test/tile_image.test +++ b/jsk_perception/test/tile_image.test @@ -1,7 +1,7 @@ - + + pkg="jsk_tools" type="test_topic_published.py" + retry="3" time-limit="120" > topic_0: /vgg16_object_recognition/output - timeout_0: 30 + timeout_0: 120 topic_1: /vgg16_object_recognition/debug/net_input - timeout_1: 30 + timeout_1: 120 diff --git a/jsk_perception/test/video_to_scene.test b/jsk_perception/test/video_to_scene.test new file mode 100644 index 0000000000..b9b9fb9ce1 --- /dev/null +++ b/jsk_perception/test/video_to_scene.test @@ -0,0 +1,21 @@ + + + + + + + + + topic_0: /kinect_head/rgb/image_rect_color/output + timeout_0: 20 + topic_1: /kinect_head/rgb/image_rect_color/output/compressed + timeout_1: 20 + check_after_kill_node: true + node_names: [video_to_scene,] + + + + diff --git a/jsk_recognition_msgs/CMakeLists.txt b/jsk_recognition_msgs/CMakeLists.txt index 116018c0b2..86e6164943 100644 --- a/jsk_recognition_msgs/CMakeLists.txt +++ b/jsk_recognition_msgs/CMakeLists.txt @@ -15,6 +15,7 @@ add_message_files( Circle2DArray.msg Circle2D.msg ClassificationResult.msg + ClipResult.msg ClusterPointIndices.msg ColorHistogramArray.msg ColorHistogram.msg @@ -46,6 +47,8 @@ add_message_files( PointsArray.msg PolygonArray.msg PosedCameraInfo.msg + QueryAndProbability.msg + QuestionAndAnswerText.msg RectArray.msg Rect.msg RotatedRect.msg @@ -68,6 +71,7 @@ add_message_files( TrackingStatus.msg BoolStamped.msg VectorArray.msg + VQAResult.msg WeightedPoseArray.msg ContactSensor.msg ContactSensorArray.msg @@ -110,9 +114,14 @@ add_service_files(FILES WhiteBalance.srv ) +add_action_files(FILES + VQATask.action + ClassificationTask.action + ) + generate_messages( DEPENDENCIES - std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs + std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs actionlib_msgs ) catkin_package( CATKIN_DEPENDS std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs @@ -133,7 +142,7 @@ install(DIRECTORY sample USE_SOURCE_PERMISSIONS ) -if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason... +if($ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason... install(DIRECTORY scripts DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} USE_SOURCE_PERMISSIONS diff --git a/jsk_recognition_msgs/action/ClassificationTask.action b/jsk_recognition_msgs/action/ClassificationTask.action new file mode 100644 index 0000000000..417235cc2c --- /dev/null +++ b/jsk_recognition_msgs/action/ClassificationTask.action @@ -0,0 +1,8 @@ +sensor_msgs/Image image +sensor_msgs/CompressedImage compressed_image +string[] queries +--- +jsk_recognition_msgs/ClassificationResult result +bool done +--- +string status diff --git a/jsk_recognition_msgs/action/VQATask.action b/jsk_recognition_msgs/action/VQATask.action new file mode 100644 index 0000000000..efd3a0e066 --- /dev/null +++ b/jsk_recognition_msgs/action/VQATask.action @@ -0,0 +1,8 @@ +sensor_msgs/Image image +sensor_msgs/CompressedImage compressed_image +string[] questions +--- +jsk_recognition_msgs/VQAResult result +bool done +--- +string status diff --git a/jsk_recognition_msgs/msg/ClipResult.msg b/jsk_recognition_msgs/msg/ClipResult.msg new file mode 100644 index 0000000000..f20e76c27d --- /dev/null +++ b/jsk_recognition_msgs/msg/ClipResult.msg @@ -0,0 +1,2 @@ +sensor_msgs/Image image +jsk_recognition_msgs/QueryAndProbability[] result diff --git a/jsk_recognition_msgs/msg/PanoramaInfo.msg b/jsk_recognition_msgs/msg/PanoramaInfo.msg index 6dd1be12b1..602d7791bd 100644 --- a/jsk_recognition_msgs/msg/PanoramaInfo.msg +++ b/jsk_recognition_msgs/msg/PanoramaInfo.msg @@ -11,7 +11,13 @@ std_msgs/Header header string projection_model ####################################################################### -# Field of View Parameters # +# Image shape info # +####################################################################### +uint32 image_height +uint32 image_width + +####################################################################### +# Field of View Parameters # ####################################################################### float64 theta_min float64 theta_max diff --git a/jsk_recognition_msgs/msg/QueryAndProbability.msg b/jsk_recognition_msgs/msg/QueryAndProbability.msg new file mode 100644 index 0000000000..3495b19f3d --- /dev/null +++ b/jsk_recognition_msgs/msg/QueryAndProbability.msg @@ -0,0 +1,2 @@ +string query +float32 probability diff --git a/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg b/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg new file mode 100644 index 0000000000..1070abd928 --- /dev/null +++ b/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg @@ -0,0 +1,2 @@ +string question +string answer diff --git a/jsk_recognition_msgs/msg/VQAResult.msg b/jsk_recognition_msgs/msg/VQAResult.msg new file mode 100644 index 0000000000..f2cc09c093 --- /dev/null +++ b/jsk_recognition_msgs/msg/VQAResult.msg @@ -0,0 +1 @@ +jsk_recognition_msgs/QuestionAndAnswerText[] result diff --git a/jsk_recognition_utils/package.xml b/jsk_recognition_utils/package.xml index 237e47bf78..cf7d9e8ce7 100644 --- a/jsk_recognition_utils/package.xml +++ b/jsk_recognition_utils/package.xml @@ -15,7 +15,8 @@ python3-setuptools catkin - cython + cython + cython3 eigen_conversions dynamic_reconfigure geometry_msgs @@ -46,7 +47,8 @@ python-skimage python3-skimage python-fcn-pip - python-shapely + python-shapely + python3-shapely sensor_msgs std_msgs tf2_ros diff --git a/sound_classification/launch/audio_to_spectrogram.launch b/sound_classification/launch/audio_to_spectrogram.launch index 7e7511b1d0..afbd67093e 100644 --- a/sound_classification/launch/audio_to_spectrogram.launch +++ b/sound_classification/launch/audio_to_spectrogram.launch @@ -59,7 +59,7 @@ - + @@ -69,12 +69,12 @@ - + - + do_dynamic_scaling: true diff --git a/sound_classification/launch/classify_sound.launch b/sound_classification/launch/classify_sound.launch index 06325ed57d..276ea93c73 100644 --- a/sound_classification/launch/classify_sound.launch +++ b/sound_classification/launch/classify_sound.launch @@ -36,7 +36,7 @@ - + gpu: $(arg gpu) model_name: nin @@ -47,11 +47,11 @@ - - + + - + diff --git a/sound_classification/launch/save_noise.launch b/sound_classification/launch/save_noise.launch index d94aaf9c19..2db861520c 100644 --- a/sound_classification/launch/save_noise.launch +++ b/sound_classification/launch/save_noise.launch @@ -34,7 +34,7 @@ - + save_data_rate: $(arg save_data_rate) diff --git a/sound_classification/launch/save_sound.launch b/sound_classification/launch/save_sound.launch index 56799dd28c..e32cff10e4 100644 --- a/sound_classification/launch/save_sound.launch +++ b/sound_classification/launch/save_sound.launch @@ -39,9 +39,9 @@ - - - + + + save_data_rate: $(arg save_data_rate) @@ -54,7 +54,7 @@ - + power_per_pixel_threshold: $(arg threshold) lazy: false