diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml
index 8be5cd9023..7d7abc3bb0 100644
--- a/.github/workflows/config.yml
+++ b/.github/workflows/config.yml
@@ -66,16 +66,29 @@ jobs:
NOT_TEST_INSTALL : true
USE_JENKINS: true
DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl
- TEST_PKGS : 'jsk_recognition_msgs' # to skip test
- BEFORE_SCRIPT : "sudo pip install numpy==1.16.6; sudo pip install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==6.7.0 protobuf==3.18.0 cupy-cuda91 pytesseract==0.3.6 torch==1.4.0;"
+ BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;"
+ BUILD_PKGS: 'checkerboard_detector imagesift jsk_perception jsk_recognition_utils resized_image_transport'
+ EXTRA_DEB: ros-noetic-pr2-description
experimental : false
- ROS_DISTRO: noetic
USE_DEB: false
NOT_TEST_INSTALL : true
USE_JENKINS: true
DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl
- BEFORE_SCRIPT : "sudo pip install numpy==1.16.6; sudo pip install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==6.7.0 protobuf==3.18.0 cupy-cuda91 pytesseract==0.3.6 torch==1.4.0;"
- experimental : true
+ TEST_PKGS : 'jsk_pcl_ros_utils jsk_pcl_ros'
+ BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;"
+ BUILD_PKGS: 'jsk_pcl_ros_utils jsk_pcl_ros'
+ EXTRA_DEB: ros-noetic-pr2-description
+ experimental : false
+ - ROS_DISTRO: noetic
+ USE_DEB: false
+ NOT_TEST_INSTALL : true
+ USE_JENKINS: true
+ DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl
+ TEST_PKGS : 'jsk_recognition_msgs' # to skip test
+ BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;"
+ EXTRA_DEB: ros-noetic-pr2-description
+ experimental : false
steps:
diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml
index 72f28a3229..3940014f6a 100644
--- a/.github/workflows/python2.yml
+++ b/.github/workflows/python2.yml
@@ -15,4 +15,4 @@ jobs:
- name: Check Python2
run: |
apt update -q && apt install -y -q python2
- python2 -m compileall .
+ python2 -m compileall -x `find docker -name "*.py"` .
diff --git a/.travis.rosinstall.noetic b/.travis.rosinstall.noetic
new file mode 100644
index 0000000000..62d2cb6127
--- /dev/null
+++ b/.travis.rosinstall.noetic
@@ -0,0 +1,4 @@
+- git: # https://github.com/ros-perception/image_transport_plugins/pull/64
+ uri: https://github.com/ros-perception/image_transport_plugins.git
+ local-name: image_transport_plugins
+ version: noetic-devel
diff --git a/audio_to_spectrogram/CMakeLists.txt b/audio_to_spectrogram/CMakeLists.txt
index e2909950b0..f3828e9d70 100644
--- a/audio_to_spectrogram/CMakeLists.txt
+++ b/audio_to_spectrogram/CMakeLists.txt
@@ -8,7 +8,7 @@ find_package(catkin REQUIRED COMPONENTS
catkin_python_setup()
generate_dynamic_reconfigure_options(
- cfg/AudioAmplitudePlot.cfg
+ cfg/DataAmplitudePlot.cfg
)
catkin_package()
@@ -41,4 +41,10 @@ if(CATKIN_ENABLE_TESTING)
find_package(catkin REQUIRED COMPONENTS rostest roslaunch)
add_rostest(test/audio_to_spectrogram.test)
roslaunch_add_file_check(launch/audio_to_spectrogram.launch)
+ if(NOT $ENV{ROS_DISTRO} STRLESS "kinetic")
+ # Under kinetic, eval cannot be used in launch files
+ # http://wiki.ros.org/roslaunch/XML#substitution_args
+ add_rostest(test/wrench_to_spectrogram.test)
+ roslaunch_add_file_check(launch/wrench_to_spectrogram.launch)
+ endif()
endif()
diff --git a/audio_to_spectrogram/README.md b/audio_to_spectrogram/README.md
index 46070a99f8..321aac4cd7 100644
--- a/audio_to_spectrogram/README.md
+++ b/audio_to_spectrogram/README.md
@@ -1,6 +1,6 @@
# audio_to_spectrogram
-This package converts audio data to spectrum and spectrogram data.
+This package converts audio data (or other time-series data) to spectrum and spectrogram data.
# Usage
By following command, you can publish audio, spectrum and spectrogram topics. Please set correct args for your microphone configuration, such as mic\_sampling\_rate or bitdepth.
@@ -9,6 +9,13 @@ By following command, you can publish audio, spectrum and spectrogram topics. Pl
roslaunch audio_to_spectrogram audio_to_spectrogram.launch
```
+Its data conversion pipeline is as follows:
+```
+audio_to_spectrum.py -> spectrum
+ -> normalized_half_spectrum
+ -> log_spectrum -> preprocess node(s) -> preprocessed spectrum -> spectrum_to_spectrogram.py -> spectrogram
+```
+
Here is an example using rosbag with 300Hz audio.
```bash
roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
@@ -18,19 +25,48 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
|---|---|---|
||![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)|
+You can also convert data other than audio to spectrum and spectrogram data using this package.
+Here is an example using rosbag of a force torque sensor sensing drill vibration.
+```bash
+roslaunch audio_to_spectrogram sample_wrench_to_spectrogram.launch
+```
+
+|Z-axis Force Amplitude|Normalized Half Spectrum|Spectrogram Source Spectrum|Spectrogram|
+|---|---|---|---|
+|||||
+
# Scripts
## audio_to_spectrum.py
+
A script to convert audio to spectrum.
- ### Publishing topics
-
- `~spectrum` (`jsk_recognition_msgs/Spectrum`)
- Spectrum data calculated from audio by FFT.
+ Spectrum data calculated from audio by FFT.
+ It is usual "amplitude spectrum".
+ See https://ryo-iijima.com/fftresult/ for details.
+
+ - `~normalized_half_spectrum` (`jsk_recognition_msgs/Spectrum`)
+
+ Spectrum data which is "half" (having non-negative frequencies (0Hz-Nyquist frequency)) and is "normalized" (consistent with the amplitude of the original signal).
+ See the following for details.
+ - https://ryo-iijima.com/fftresult/
+ - https://stackoverflow.com/questions/63211851/why-divide-the-output-of-numpy-fft-by-n
+ - https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issue-1550715400
+
+ - `~log_spectrum` (`jsk_recognition_msgs/Spectrum`)
+
+ Log-scaled spectrum data.
+ It is calculated by applying log to the absolute value of the FFT result.
+ Usually, log is applied to "power spectrum", but we don't use it for simplicity.
+ See the following for details.
+ - https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issuecomment-1445810380
+ - http://makotomurakami.com/blog/2020/05/23/5266/
- ### Subscribing topics
- - `audio` (`audio_common_msgs/AudioData`)
+ - `~audio` (`audio_common_msgs/AudioData`)
Audio stream data from microphone. The audio format must be `wave`.
@@ -55,15 +91,94 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
Number of bits per audio data.
- - `~high_cut_freq` (`Int`, default: `800`)
+ - `~fft_exec_rate` (`Double`, default: `50`)
+
+ Rate [Hz] to execute FFT and publish its results.
+
+## data_to_spectrum.py
+
+ Generalized version of `audio_to_spectrum.py`.
+ This script can convert multiple message types to spectrum.
+
+ - ### Publishing topics
+
+ Same as `audio_to_spectrum.py`.
+
+ - ### Subscribing topics
+ - `~input` (`AnyMsg`)
+
+ Topic to which message including data you want to convert to spectrum is published.
+
+ - ### Parameters
+ - `~expression_to_get_data` (`String`, default: `m.data`)
+
+ Python expression to get data from the input message `m`. For example, if your input is `std_msgs/Float64`, it is `m.data`.
+ Just accessing a field of `m` is recommended.
+ If you want to do a complex calculation (e.g., using `numpy`), use `transform` of `topic_tools` before this node.
+
+ - `~data_sampling_rate` (`Int`, default: `500`)
+
+ Sampling rate [Hz] of input data.
+
+ - `~fft_sampling_period` (`Double`, default: `0.3`)
+
+ Period [s] to sample input data for one FFT.
+
+ - `~fft_exec_rate` (`Double`, default: `50`)
+
+ Rate [Hz] to execute FFT and publish its results.
+
+ - `~is_integer` (`Bool`, default: `false`)
+
+ Whether input data is integer or not. For example, if your input is `std_msgs/Float64`, it is `false`.
+
+ - `~is_signed` (`Bool`, default: `true`)
+
+ Whether input data is signed or not. For example, if your input is `std_msgs/Float64`, it is `true`.
+
+ - `~bitdepth` (`Int`, default: `64`)
+
+ Number of bits per input data. For example, if your input is `std_msgs/Float64`, it is `64`.
+
+ - `~n_channel` (`Int`, default: `1`)
+
+ If your input is scalar, it is `1`.
+ If your input is flattened 2D matrix, it is number of channel of original matrix.
+
+ - `~target_channel` (`Int`, default: `0`)
+
+ If your input is scalar, it is `0`.
+ If your input is flattened 2D matrix, it is target channel.
+
+## spectrum_filter.py
+
+ A script to filter spectrum.
+
+ - ### Publishing topics
+ - `~output` (`jsk_recognition_msgs/Spectrum`)
+
+ Filtered spectrum data (`low_cut_freq`-`high_cut_freq`).
+
+ - ### Subscribing topics
+ - `~input` (`jsk_recognition_msgs/Spectrum`)
+
+ Original spectrum data.
+
+ - ### Parameters
+ - `~data_sampling_rate` (`Int`, default: `500`)
+
+ Sampling rate [Hz] of data used in generation of original spectrum data.
+
+ - `~high_cut_freq` (`Int`, default: `250`)
Threshold to limit the maximum frequency of the output spectrum.
- - `~low_cut_freq` (`Int`, default: `1`)
+ - `~low_cut_freq` (`Int`, default: `0`)
Threshold to limit the minimum frequency of the output spectrum.
## spectrum_to_spectrogram.py
+
A script to convert spectrum to spectrogram.
- ### Publishing topics
@@ -128,7 +243,7 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
Number of bits per audio data.
- - `~maximum_amplitude` (`Int`, default: `10000`)
+ - `~maximum_amplitude` (`Double`, default: `10000.0`)
Maximum range of amplitude to plot.
@@ -140,6 +255,66 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
Publish rate [Hz] of audio amplitude image topic.
+## data_amplitude_plot.py
+
+ Generalized version of `audio_amplitude_plot.py`.
+
+ - ### Publishing topics
+
+ - `~output/viz` (`sensor_msgs/Image`)
+
+ Data amplitude plot image.
+
+ - ### Subscribing topics
+ - `~input` (`AnyMsg`)
+
+ Topic to which message including data whose amplitude you want to plot is published.
+
+ - ### Parameters
+ - `~expression_to_get_data` (`String`, default: `m.data`)
+
+ Python expression to get data from the input message `m`. For example, if your input is `std_msgs/Float64`, it is `m.data`.
+ Just accessing a field of `m` is recommended.
+ If you want to do a complex calculation (e.g., using `numpy`), use `transform` of `topic_tools` before this node.
+
+ - `~data_sampling_rate` (`Int`, default: `500`)
+
+ Sampling rate [Hz] of input data.
+
+ - `~is_integer` (`Bool`, default: `false`)
+
+ Whether input data is integer or not. For example, if your input is `std_msgs/Float64`, it is `false`.
+
+ - `~is_signed` (`Bool`, default: `true`)
+
+ Whether input data is signed or not. For example, if your input is `std_msgs/Float64`, it is `true`.
+
+ - `~bitdepth` (`Int`, default: `64`)
+
+ Number of bits per input data. For example, if your input is `std_msgs/Float64`, it is `64`.
+
+ - `~n_channel` (`Int`, default: `1`)
+
+ If your input is scalar, it is `1`.
+ If your input is flattened 2D matrix, it is number of channel of original matrix.
+
+ - `~target_channel` (`Int`, default: `0`)
+
+ If your input is scalar, it is `0`.
+ If your input is flattened 2D matrix, it is target channel.
+
+ - `~maximum_amplitude` (`Double`, default: `10.0`)
+
+ Maximum range of amplitude to plot.
+
+ - `~window_size` (`Double`, default: `10.0`)
+
+ Window size of input data to plot.
+
+ - `~rate` (`Double`, default: `10.0`)
+
+ Publish rate [Hz] of data amplitude image topic.
+
## spectrum_plot.py
A script to publish frequency vs amplitude plot image.
@@ -157,3 +332,20 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
- `~spectrum` (`jsk_recognition_msgs/Spectrum`)
Spectrum data calculated from audio by FFT.
+
+ - ### Parameters
+ - `~min_amp` (`Double`, default: `0.0`)
+
+ Minimum value of amplitude in plot.
+
+ - `~max_amp` (`Double`, default: `20.0`)
+
+ Maximum value of amplitude in plot.
+
+ - `~queue_size` (`Int`, default: `1`)
+
+ Queue size of spectrum subscriber.
+
+ - `~max_rate` (`Double`, default: `-1`)
+
+ Maximum publish rate [Hz] of frequency vs amplitude plot image. Setting this value low reduces CPU load. `-1` means no maximum limit.
diff --git a/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg b/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg
deleted file mode 100755
index a626a14603..0000000000
--- a/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg
+++ /dev/null
@@ -1,13 +0,0 @@
-#! /usr/bin/env python
-
-PACKAGE = 'audio_to_spectrogram'
-
-from dynamic_reconfigure.parameter_generator_catkin import *
-
-
-gen = ParameterGenerator()
-
-gen.add("maximum_amplitude", int_t, 0, "Maximum range of amplitude to plot", 10000, 1, 100000)
-gen.add("window_size", double_t, 0, "Window size of sound input to plot", 10.0, 0.1, 10000)
-
-exit(gen.generate(PACKAGE, PACKAGE, "AudioAmplitudePlot"))
diff --git a/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg b/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg
new file mode 100755
index 0000000000..9c2c962160
--- /dev/null
+++ b/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg
@@ -0,0 +1,13 @@
+#! /usr/bin/env python
+
+PACKAGE = 'audio_to_spectrogram'
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+
+gen = ParameterGenerator()
+
+gen.add("maximum_amplitude", double_t, 0, "Maximum range of amplitude to plot", 10.0)
+gen.add("window_size", double_t, 0, "Window size of data input to plot", 10.0)
+
+exit(gen.generate(PACKAGE, PACKAGE, "DataAmplitudePlot"))
diff --git a/audio_to_spectrogram/docs/images/wrench_amplitude.jpg b/audio_to_spectrogram/docs/images/wrench_amplitude.jpg
new file mode 100644
index 0000000000..fd1ae4dc0a
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_amplitude.jpg differ
diff --git a/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg b/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg
new file mode 100644
index 0000000000..11065960ee
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg differ
diff --git a/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg b/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg
new file mode 100644
index 0000000000..0e7b7679eb
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg differ
diff --git a/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg b/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg
new file mode 100644
index 0000000000..27e427484e
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg differ
diff --git a/audio_to_spectrogram/launch/audio_to_spectrogram.launch b/audio_to_spectrogram/launch/audio_to_spectrogram.launch
index 56f1c139aa..ae0eaf8551 100644
--- a/audio_to_spectrogram/launch/audio_to_spectrogram.launch
+++ b/audio_to_spectrogram/launch/audio_to_spectrogram.launch
@@ -2,20 +2,34 @@
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
-
@@ -28,66 +42,32 @@
-
-
-
- n_channel: $(arg n_channel)
- mic_sampling_rate: $(arg mic_sampling_rate)
- fft_sampling_period: 0.3
- bitdepth: $(arg bitdepth)
- high_cut_freq: $(arg high_cut_freq)
- low_cut_freq: $(arg low_cut_freq)
- fft_exec_rate: 50
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
- image_height: 300
- image_width: 300
- spectrogram_period: $(arg spectrogram_period)
-
-
-
-
-
-
-
- n_channel: $(arg n_channel)
- mic_sampling_rate: $(arg mic_sampling_rate)
- bitdepth: $(arg bitdepth)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- do_dynamic_scaling: true
-
- colormap: 2
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/audio_to_spectrogram/launch/audio_to_spectrum.launch b/audio_to_spectrogram/launch/audio_to_spectrum.launch
new file mode 100644
index 0000000000..fd2650fdee
--- /dev/null
+++ b/audio_to_spectrogram/launch/audio_to_spectrum.launch
@@ -0,0 +1,52 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ n_channel: $(arg n_channel)
+ target_channel: $(arg target_channel)
+ mic_sampling_rate: $(arg mic_sampling_rate)
+ fft_sampling_period: $(arg fft_sampling_period)
+ bitdepth: $(arg bitdepth)
+ fft_exec_rate: $(arg fft_exec_rate)
+
+
+
+
+
+
+
+ n_channel: $(arg n_channel)
+ target_channel: $(arg target_channel)
+ mic_sampling_rate: $(arg mic_sampling_rate)
+ bitdepth: $(arg bitdepth)
+ maximum_amplitude: $(arg audio_amp_plot_max_amp)
+ window_size: $(arg audio_amp_plot_window_size)
+ rate: $(arg audio_amp_plot_rate)
+
+
+
+
+
+
+
+
+
+
diff --git a/audio_to_spectrogram/launch/data_to_spectrum.launch b/audio_to_spectrogram/launch/data_to_spectrum.launch
new file mode 100644
index 0000000000..3acf38845f
--- /dev/null
+++ b/audio_to_spectrogram/launch/data_to_spectrum.launch
@@ -0,0 +1,61 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ expression_to_get_data: $(arg expression_to_get_data)
+ data_sampling_rate: $(arg data_sampling_rate)
+ fft_sampling_period: $(arg fft_sampling_period)
+ is_integer: $(arg is_integer)
+ is_signed: $(arg is_signed)
+ bitdepth: $(arg bitdepth)
+ n_channel: $(arg n_channel)
+ target_channel: $(arg target_channel)
+ fft_exec_rate: $(arg fft_exec_rate)
+
+
+
+
+
+
+
+ expression_to_get_data: $(arg expression_to_get_data)
+ data_sampling_rate: $(arg data_sampling_rate)
+ is_integer: $(arg is_integer)
+ is_signed: $(arg is_signed)
+ bitdepth: $(arg bitdepth)
+ n_channel: $(arg n_channel)
+ target_channel: $(arg target_channel)
+ maximum_amplitude: $(arg data_amp_plot_max_amp)
+ window_size: $(arg data_amp_plot_window_size)
+ rate: $(arg data_amp_plot_rate)
+
+
+
+
+
+
+
+
+
+
diff --git a/audio_to_spectrogram/launch/spectrum_plot.launch b/audio_to_spectrogram/launch/spectrum_plot.launch
new file mode 100644
index 0000000000..1ec8c2cf14
--- /dev/null
+++ b/audio_to_spectrogram/launch/spectrum_plot.launch
@@ -0,0 +1,29 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ min_amp: $(arg min_amp)
+ max_amp: $(arg max_amp)
+ queue_size: $(arg queue_size)
+ max_rate: $(arg max_rate)
+
+
+
+
+
+
+
+
+
+
diff --git a/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch b/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch
new file mode 100644
index 0000000000..11cae6f7c2
--- /dev/null
+++ b/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ data_sampling_rate: $(arg data_sampling_rate)
+ high_cut_freq: $(arg high_cut_freq)
+ low_cut_freq: $(arg low_cut_freq)
+
+
+
+
+
+
+
+ image_height: $(arg spectrogram_height)
+ image_width: $(arg spectrogram_width)
+ spectrogram_period: $(arg spectrogram_period)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ do_dynamic_scaling: $(arg do_dynamic_scaling)
+ colormap: $(arg colormap)
+
+
+
+
+
diff --git a/audio_to_spectrogram/launch/wrench_to_spectrogram.launch b/audio_to_spectrogram/launch/wrench_to_spectrogram.launch
new file mode 100644
index 0000000000..9c1ca7e144
--- /dev/null
+++ b/audio_to_spectrogram/launch/wrench_to_spectrogram.launch
@@ -0,0 +1,87 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/audio_to_spectrogram/package.xml b/audio_to_spectrogram/package.xml
index 3d72033ad7..a1f5101094 100644
--- a/audio_to_spectrogram/package.xml
+++ b/audio_to_spectrogram/package.xml
@@ -15,13 +15,18 @@
audio_common_msgs
cv_bridge
dynamic_reconfigure
+ geometry_msgs
image_view
jsk_recognition_msgs
jsk_topic_tools
python-matplotlib
python3-matplotlib
+ rostopic
sensor_msgs
+ std_msgs
+ topic_tools
+ jsk_tools
roslaunch
rostest
diff --git a/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag b/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag
new file mode 100644
index 0000000000..cdf0cb60e8
Binary files /dev/null and b/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag differ
diff --git a/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch b/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch
new file mode 100644
index 0000000000..702025ceee
--- /dev/null
+++ b/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/audio_to_spectrogram/scripts/audio_amplitude_plot.py b/audio_to_spectrogram/scripts/audio_amplitude_plot.py
index 856b149fff..54dc338d3b 100755
--- a/audio_to_spectrogram/scripts/audio_amplitude_plot.py
+++ b/audio_to_spectrogram/scripts/audio_amplitude_plot.py
@@ -1,85 +1,22 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from __future__ import division
-
-import cv_bridge
-from dynamic_reconfigure.server import Server
-from jsk_topic_tools import ConnectionBasedTransport
-import matplotlib.pyplot as plt
-import numpy as np
import rospy
-import sensor_msgs.msg
from audio_to_spectrogram import AudioBuffer
-from audio_to_spectrogram.cfg import AudioAmplitudePlotConfig as Config
-from audio_to_spectrogram import convert_matplotlib_to_img
+from audio_to_spectrogram import DataAmplitudePlot
-class AudioAmplitudePlot(ConnectionBasedTransport):
+class AudioAmplitudePlot(DataAmplitudePlot):
def __init__(self):
- super(AudioAmplitudePlot, self).__init__()
-
- self.audio_buffer = AudioBuffer.from_rosparam()
-
- # Set matplotlib config
- self.fig = plt.figure(figsize=(8, 5))
- self.ax = self.fig.add_subplot(1, 1, 1)
- self.ax.grid(True)
- self.ax.set_xlabel('Time [s]', fontsize=12)
- self.ax.set_ylabel('Amplitude', fontsize=12)
- self.line, = self.ax.plot([0, 0], label='Amplitude of Audio')
-
- # ROS dynamic reconfigure
- self.srv = Server(Config, self.config_callback)
-
- self.pub_img = self.advertise(
- '~output/viz', sensor_msgs.msg.Image, queue_size=1)
-
- def start_timer(self):
- rate = rospy.get_param('~rate', 10)
- if rate == 0:
- rospy.logwarn('You cannot set 0 as the rate; change it to 10.')
- rate = 10
- self.timer = rospy.Timer(rospy.Duration(1.0 / rate), self.timer_cb)
-
- def stop_timer(self):
- self.timer.shutdown()
-
- def subscribe(self):
- self.audio_buffer.subscribe()
- self.start_timer()
-
- def unsubscribe(self):
- self.audio_buffer.unsubscribe()
- self.stop_timer()
-
- def config_callback(self, config, level):
- self.maximum_amplitude = config.maximum_amplitude
- self.window_size = config.window_size
- self.audio_buffer.window_size = self.window_size
- return config
-
- def timer_cb(self, timer):
- window_size = self.window_size
- amp = self.audio_buffer.read(window_size)
- if len(amp) == 0:
- return
- times = np.linspace(-window_size, 0.0, len(amp))
-
- # Plot audio amplitude.
- self.line.set_data(times, amp)
- self.ax.set_xlim((times.min(), times.max()))
- self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude))
-
- self.ax.legend(loc='upper right')
- if self.pub_img.get_num_connections() > 0:
- bridge = cv_bridge.CvBridge()
- img = convert_matplotlib_to_img(self.fig)
- img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8')
- img_msg.header.stamp = rospy.Time.now()
- self.pub_img.publish(img_msg)
+ # Overwrite dynamic_reconfigure's default values
+ param_name = '~maximum_amplitude'
+ if not rospy.has_param(param_name):
+ rospy.set_param(param_name, 10000)
+
+ super(AudioAmplitudePlot, self).__init__(
+ data_buffer=AudioBuffer.from_rosparam(),
+ )
if __name__ == '__main__':
diff --git a/audio_to_spectrogram/scripts/audio_to_spectrum.py b/audio_to_spectrogram/scripts/audio_to_spectrum.py
index 920bc67c58..0fd85254cc 100755
--- a/audio_to_spectrogram/scripts/audio_to_spectrum.py
+++ b/audio_to_spectrogram/scripts/audio_to_spectrum.py
@@ -1,67 +1,19 @@
#!/usr/bin/env python
-import numpy as np
import rospy
-from audio_common_msgs.msg import AudioData
-from jsk_recognition_msgs.msg import Spectrum
-
from audio_to_spectrogram import AudioBuffer
+from audio_to_spectrogram import DataToSpectrum
# This node execute FFT to audio (audio_common_msgs/AudioData)
# The format of audio topic is assumed to be wave.
-class AudioToSpectrum(object):
+class AudioToSpectrum(DataToSpectrum):
def __init__(self):
- super(AudioToSpectrum, self).__init__()
-
- self.audio_buffer = AudioBuffer.from_rosparam(auto_start=True)
- fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3)
- self.audio_buffer.window_size = fft_sampling_period
- mic_sampling_rate = self.audio_buffer.input_sample_rate
-
- # fft config
- window_function = np.arange(
- 0.0, 1.0, 1.0 / self.audio_buffer.audio_buffer_len)
- self.window_function = 0.54 - 0.46 * np.cos(
- 2 * np.pi * window_function)
- high_cut_freq = rospy.get_param('~high_cut_freq', 800)
- if high_cut_freq > mic_sampling_rate / 2:
- rospy.logerr('Set high_cut_freq lower than {} Hz'.format(
- mic_sampling_rate / 2))
- low_cut_freq = rospy.get_param('~low_cut_freq', 1) # remove 0 Hz
- self.freq = np.fft.fftfreq(
- self.audio_buffer.audio_buffer_len, d=1./mic_sampling_rate)
- self.cutoff_mask = np.where(
- (low_cut_freq <= self.freq) & (self.freq <= high_cut_freq),
- True, False)
- # How many times fft is executed in one second
- # fft_exec_rate equals to output spectrogram hz
- self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50)
-
- # Publisher and Subscriber
- self.pub_spectrum = rospy.Publisher(
- '~spectrum', Spectrum, queue_size=1)
- self.pub_spectrum_filtered = rospy.Publisher(
- '~spectrum_filtered', Spectrum, queue_size=1)
- rospy.Timer(rospy.Duration(1. / self.fft_exec_rate), self.timer_cb)
-
- def timer_cb(self, timer):
- if len(self.audio_buffer) != self.audio_buffer.audio_buffer_len:
- return
- audio_data = self.audio_buffer.read()
- # Calc spectrum by fft
- amplitude = np.fft.fft(audio_data * self.window_function)
- amplitude = np.log(np.abs(amplitude))
- spectrum_msg = Spectrum()
- spectrum_msg.header.stamp = rospy.Time.now()
- spectrum_msg.amplitude = amplitude
- spectrum_msg.frequency = self.freq
- self.pub_spectrum.publish(spectrum_msg)
- spectrum_msg.amplitude = amplitude[self.cutoff_mask]
- spectrum_msg.frequency = self.freq[self.cutoff_mask]
- self.pub_spectrum_filtered.publish(spectrum_msg)
+ super(AudioToSpectrum, self).__init__(
+ data_buffer=AudioBuffer.from_rosparam(auto_start=True),
+ )
if __name__ == '__main__':
diff --git a/audio_to_spectrogram/scripts/data_amplitude_plot.py b/audio_to_spectrogram/scripts/data_amplitude_plot.py
new file mode 100755
index 0000000000..e34ad7b97c
--- /dev/null
+++ b/audio_to_spectrogram/scripts/data_amplitude_plot.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+import rospy
+
+from audio_to_spectrogram import DataAmplitudePlot
+
+
+if __name__ == '__main__':
+ rospy.init_node('data_amplitude_plot')
+ DataAmplitudePlot()
+ rospy.spin()
diff --git a/audio_to_spectrogram/scripts/data_to_spectrum.py b/audio_to_spectrogram/scripts/data_to_spectrum.py
new file mode 100755
index 0000000000..62f6e1b798
--- /dev/null
+++ b/audio_to_spectrogram/scripts/data_to_spectrum.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+
+import rospy
+
+from audio_to_spectrogram import DataToSpectrum
+
+# This node execute FFT to input data
+
+if __name__ == '__main__':
+ rospy.init_node('data_to_spectrum')
+ DataToSpectrum()
+ rospy.spin()
diff --git a/audio_to_spectrogram/scripts/spectrum_filter.py b/audio_to_spectrogram/scripts/spectrum_filter.py
new file mode 100755
index 0000000000..bfa26d7808
--- /dev/null
+++ b/audio_to_spectrogram/scripts/spectrum_filter.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+from __future__ import division
+
+import numpy as np
+import rospy
+
+from jsk_recognition_msgs.msg import Spectrum
+from jsk_topic_tools import ConnectionBasedTransport
+
+
+class SpectrumFilter(ConnectionBasedTransport):
+
+ def __init__(self):
+ super(SpectrumFilter, self).__init__()
+ data_sampling_rate = rospy.get_param('~data_sampling_rate', 500)
+ self.high_cut_freq = rospy.get_param('~high_cut_freq', 250)
+ nyquist_freq = data_sampling_rate / 2.0
+ if self.high_cut_freq > nyquist_freq:
+ rospy.logerr('Set high_cut_freq not more than {} Hz'.format(
+ nyquist_freq))
+ self.low_cut_freq = rospy.get_param('~low_cut_freq', 0)
+
+ self.pub = self.advertise('~output', Spectrum, queue_size=1)
+
+ def subscribe(self):
+ self.sub = rospy.Subscriber('~input', Spectrum, self.input_cb)
+
+ def unsubscribe(self):
+ self.sub.unregister()
+
+ def input_cb(self, sub_msg):
+ amp = np.array(sub_msg.amplitude, dtype=np.float32)
+ freq = np.array(sub_msg.frequency, dtype=np.float32)
+ cutoff_mask = np.where(
+ (self.low_cut_freq <= freq) & (freq <= self.high_cut_freq),
+ True, False)
+ pub_msg = Spectrum()
+ pub_msg.header.stamp = sub_msg.header.stamp
+ pub_msg.amplitude = amp[cutoff_mask]
+ pub_msg.frequency = freq[cutoff_mask]
+ self.pub.publish(pub_msg)
+
+
+if __name__ == '__main__':
+ rospy.init_node('spectrum_filter')
+ SpectrumFilter()
+ rospy.spin()
diff --git a/audio_to_spectrogram/scripts/spectrum_plot.py b/audio_to_spectrogram/scripts/spectrum_plot.py
index bf27a824f7..2486d192db 100755
--- a/audio_to_spectrogram/scripts/spectrum_plot.py
+++ b/audio_to_spectrogram/scripts/spectrum_plot.py
@@ -5,6 +5,9 @@
import cv_bridge
from jsk_topic_tools import ConnectionBasedTransport
+import matplotlib
+from audio_to_spectrogram import check_matplotlib_version; check_matplotlib_version()
+matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import rospy
@@ -19,13 +22,28 @@ class SpectrumPlot(ConnectionBasedTransport):
def __init__(self):
super(SpectrumPlot, self).__init__()
+ self.min_amp = rospy.get_param('~min_amp', 0.0)
+ self.max_amp = rospy.get_param('~max_amp', 20.0)
+ self.queue_size = rospy.get_param('~queue_size', 1)
+ max_rate = rospy.get_param('~max_rate', -1)
+ if max_rate > 0:
+ self.min_period = 1.0 / max_rate
+ else:
+ self.min_period = -1
+ self.prev_pub_tm = None
# Set matplotlib config
self.fig = plt.figure(figsize=(8, 5))
- self.fig.suptitle('Spectrum plot', size=12)
self.fig.subplots_adjust(left=0.1, right=0.95, top=0.90, bottom=0.1,
wspace=0.2, hspace=0.6)
self.ax = self.fig.add_subplot(1, 1, 1)
self.ax.grid(True)
+ # self.fig.suptitle('Spectrum plot', size=12)
+ self.ax.set_title('Spectrum plot', fontsize=12)
+ # Use self.ax.set_title() instead of
+ # self.fig.suptitle() to use self.fig.tight_layout()
+ # preventing characters from being cut off
+ # cf. https://tm23forest.com/contents/matplotlib-tightlayout-with-figure-suptitle
+ # https://matplotlib.org/2.2.4/tutorials/intermediate/tight_layout_guide.html
self.ax.set_xlabel('Frequency [Hz]', fontsize=12)
self.ax.set_ylabel('Amplitude', fontsize=12)
self.line, = self.ax.plot([0, 0], label='Amplitude of Spectrum')
@@ -35,19 +53,31 @@ def __init__(self):
def subscribe(self):
self.sub_spectrum = rospy.Subscriber(
- '~spectrum', Spectrum, self._cb, queue_size=1000)
+ '~spectrum', Spectrum, self._cb, queue_size=self.queue_size)
def unsubscribe(self):
self.sub_spectrum.unregister()
def _cb(self, msg):
+ # Keep max_rate
+ if self.min_period > 0:
+ curr_tm = rospy.Time.now().to_sec()
+ if self.prev_pub_tm is not None:
+ elapsed_tm = curr_tm - self.prev_pub_tm
+ if elapsed_tm < 0:
+ # Time moved backwards (e.g., rosbag play --loop)
+ pass
+ elif elapsed_tm < self.min_period:
+ return
+ self.prev_pub_tm = curr_tm
# Plot spectrum
self.amp = np.array(msg.amplitude)
self.freq = np.array(msg.frequency)
self.line.set_data(self.freq, self.amp)
self.ax.set_xlim((self.freq.min(), self.freq.max()))
- self.ax.set_ylim((0.0, 20))
+ self.ax.set_ylim((self.min_amp, self.max_amp))
self.ax.legend(loc='upper right')
+ self.fig.tight_layout()
if self.pub_img.get_num_connections() > 0:
bridge = cv_bridge.CvBridge()
img = convert_matplotlib_to_img(self.fig)
diff --git a/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py b/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py
index f5be653334..86d7c21451 100755
--- a/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py
+++ b/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py
@@ -1,9 +1,13 @@
#!/usr/bin/env python
+from __future__ import division
+
import cv2
from cv_bridge import CvBridge
import numpy as np
import rospy
+from distutils.version import LooseVersion
+import pkg_resources
from jsk_recognition_msgs.msg import Spectrum
from sensor_msgs.msg import Image
@@ -34,10 +38,18 @@ def __init__(self):
'~spectrogram', Image, queue_size=1)
publish_rate = rospy.get_param(
'~publish_rate', float(self.image_width / self.spectrogram_period))
- rospy.Timer(
- rospy.Duration(
- 1.0 / publish_rate),
- self.timer_cb)
+
+ timer_kwargs = dict(
+ period=rospy.Duration(1.0 / publish_rate),
+ callback=self.timer_cb,
+ oneshot=False,
+ )
+ if (LooseVersion(pkg_resources.get_distribution('rospy').version) >=
+ LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None):
+ # on >=kinetic, it raises ROSTimeMovedBackwardsException
+ # when we use rosbag play --loop.
+ timer_kwargs['reset'] = True
+ self.timer = rospy.Timer(**timer_kwargs)
self.bridge = CvBridge()
def audio_cb(self, msg):
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py b/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py
index d8d91d0b47..657ce0c11c 100644
--- a/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py
@@ -1,2 +1,6 @@
-from .audio_buffer import AudioBuffer
-from .convert_matplotlib import convert_matplotlib_to_img
+from audio_to_spectrogram.audio_buffer import AudioBuffer
+from audio_to_spectrogram.compat import check_matplotlib_version
+from audio_to_spectrogram.convert_matplotlib import convert_matplotlib_to_img
+from audio_to_spectrogram.data_amplitude_plot import DataAmplitudePlot
+from audio_to_spectrogram.data_buffer import DataBuffer
+from audio_to_spectrogram.data_to_spectrum import DataToSpectrum
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py b/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py
index 6c73c39c15..41f54ed618 100644
--- a/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py
@@ -1,14 +1,9 @@
-from __future__ import division
-
-import array
-from threading import Lock
-
-from audio_common_msgs.msg import AudioData
-import numpy as np
import rospy
+from audio_to_spectrogram.data_buffer import DataBuffer
-class AudioBuffer(object):
+
+class AudioBuffer(DataBuffer):
def __init__(self, topic_name='~audio',
input_sample_rate=16000,
@@ -18,44 +13,21 @@ def __init__(self, topic_name='~audio',
get_latest_data=False,
discard_data=False,
auto_start=False):
- self.is_subscribing = True
- self.get_latest_data = get_latest_data
- self.discard_data = discard_data
- self._window_size = window_size
- self.audio_buffer_len = int(self._window_size * input_sample_rate)
- self.lock = Lock()
- self.bitdepth = bitdepth
- self.n_channel = n_channel
- self.target_channel = min(self.n_channel - 1, max(0, target_channel))
- self.input_sample_rate = input_sample_rate
- self.type_code = {}
- for code in ['b', 'h', 'i', 'l']:
- self.type_code[array.array(code).itemsize] = code
-
- self.dtype = self.type_code[self.bitdepth / 8]
- self.audio_buffer = np.array([], dtype=self.dtype)
-
- self.max_value = 2 ** (self.bitdepth - 1) - 1
-
- self.topic_name = topic_name
-
- if auto_start:
- self.subscribe()
-
- def __len__(self):
- return len(self.audio_buffer)
- @property
- def window_size(self):
- return self._window_size
-
- @window_size.setter
- def window_size(self, size):
- with self.lock:
- self._window_size = size
- self.audio_buffer_len = int(self._window_size
- * self.input_sample_rate)
- self.audio_buffer = np.array([], dtype=self.dtype)
+ super(AudioBuffer, self).__init__(
+ topic_name=topic_name,
+ expr_to_get_data='m.data', # field of audio_common_msgs/AudioData
+ input_sample_rate=input_sample_rate,
+ window_size=window_size,
+ is_integer=True,
+ is_signed=True,
+ bitdepth=bitdepth,
+ n_channel=n_channel,
+ target_channel=target_channel,
+ get_latest_data=get_latest_data,
+ discard_data=discard_data,
+ auto_start=auto_start,
+ )
@staticmethod
def from_rosparam(**kwargs):
@@ -68,51 +40,3 @@ def from_rosparam(**kwargs):
n_channel=n_channel,
target_channel=target_channel,
**kwargs)
-
- def subscribe(self):
- self.audio_buffer = np.array([], dtype=self.dtype)
- self.sub_audio = rospy.Subscriber(
- self.topic_name, AudioData, self.audio_cb)
-
- def unsubscribe(self):
- self.sub_audio.unregister()
-
- def _read(self, size, normalize=False):
- with self.lock:
- if self.get_latest_data:
- audio_buffer = self.audio_buffer[-size:]
- else:
- audio_buffer = self.audio_buffer[:size]
- if self.discard_data:
- self.audio_buffer = self.audio_buffer[size:]
- if normalize is True:
- audio_buffer = audio_buffer / self.max_value
- return audio_buffer
-
- def sufficient_data(self, size):
- return len(self.audio_buffer) < size
-
- def read(self, size=None, wait=False, normalize=False):
- if size is None:
- size = self.audio_buffer_len
- size = int(size * self.input_sample_rate)
- while wait is True \
- and not rospy.is_shutdown() and len(self.audio_buffer) < size:
- rospy.sleep(0.001)
- return self._read(size, normalize=normalize)
-
- def close(self):
- try:
- self.sub_audio.unregister()
- except Exception:
- pass
- self.audio_buffer = np.array([], dtype=self.dtype)
-
- def audio_cb(self, msg):
- audio_buffer = np.frombuffer(msg.data, dtype=self.dtype)
- audio_buffer = audio_buffer[self.target_channel::self.n_channel]
- with self.lock:
- self.audio_buffer = np.append(
- self.audio_buffer, audio_buffer)
- self.audio_buffer = self.audio_buffer[
- -self.audio_buffer_len:]
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/compat.py b/audio_to_spectrogram/src/audio_to_spectrogram/compat.py
new file mode 100644
index 0000000000..2a3cce1c82
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/compat.py
@@ -0,0 +1,25 @@
+import os
+import subprocess
+import warnings
+
+import matplotlib
+
+
+def check_matplotlib_version(
+ python_version=os.getenv('ROS_PYTHON_VERSION')):
+ if python_version == '2':
+ python = 'python'
+ else:
+ python = 'python{}'.format(python_version)
+ out = subprocess.check_output(
+ ['dpkg', '-L', '{}-matplotlib'.format(python)])
+ if isinstance(out, bytes):
+ out = out.decode('utf-8')
+ matplotlib_filelists = out.split()
+ if os.path.dirname(matplotlib.__file__) in matplotlib_filelists:
+ return True
+ else:
+ warnings.warn(
+ "It looks like you are using pip's matplotlib. "
+ "Apt's matplotlib is recommended.")
+ return False
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py b/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py
index 0b11d6b801..4324cda94e 100644
--- a/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py
@@ -7,6 +7,7 @@
import numpy as np
import PIL
+import PIL.Image
def convert_matplotlib_to_img(fig):
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py
new file mode 100644
index 0000000000..dd7311fc99
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py
@@ -0,0 +1,100 @@
+from __future__ import division
+
+from distutils.version import LooseVersion
+import pkg_resources
+import cv_bridge
+from dynamic_reconfigure.server import Server
+from jsk_topic_tools import ConnectionBasedTransport
+import matplotlib
+from audio_to_spectrogram.compat import check_matplotlib_version; check_matplotlib_version()
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import rospy
+import sensor_msgs.msg
+
+from audio_to_spectrogram.cfg import DataAmplitudePlotConfig as Config
+from audio_to_spectrogram.convert_matplotlib import convert_matplotlib_to_img
+from audio_to_spectrogram.data_buffer import DataBuffer
+
+
+class DataAmplitudePlot(ConnectionBasedTransport):
+
+ def __init__(self, data_buffer=None):
+ super(DataAmplitudePlot, self).__init__()
+
+ if data_buffer is None:
+ self.data_buffer = DataBuffer.from_rosparam()
+ else:
+ self.data_buffer = data_buffer
+
+ # Set matplotlib config
+ self.fig = plt.figure(figsize=(8, 5))
+ self.ax = self.fig.add_subplot(1, 1, 1)
+ self.ax.grid(True)
+ self.ax.set_xlabel('Time [s]', fontsize=12)
+ self.ax.set_ylabel('Amplitude', fontsize=12)
+ self.line, = self.ax.plot([0, 0], label='Amplitude of Input Data')
+
+ # ROS dynamic reconfigure
+ self.srv = Server(Config, self.config_callback)
+
+ self.pub_img = self.advertise(
+ '~output/viz', sensor_msgs.msg.Image, queue_size=1)
+
+ def start_timer(self):
+ rate = rospy.get_param('~rate', 10)
+ if rate == 0:
+ rospy.logwarn('You cannot set 0 as the rate; change it to 10.')
+ rate = 10
+
+ timer_kwargs = dict(
+ period=rospy.Duration(1.0 / rate),
+ callback=self.timer_cb,
+ oneshot=False,
+ )
+ if (LooseVersion(pkg_resources.get_distribution('rospy').version) >=
+ LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None):
+ # on >=kinetic, it raises ROSTimeMovedBackwardsException
+ # when we use rosbag play --loop.
+ timer_kwargs['reset'] = True
+ self.timer = rospy.Timer(**timer_kwargs)
+
+ def stop_timer(self):
+ self.timer.shutdown()
+ self.timer = None
+
+ def subscribe(self):
+ self.data_buffer.subscribe()
+ self.start_timer()
+
+ def unsubscribe(self):
+ self.data_buffer.unsubscribe()
+ self.stop_timer()
+
+ def config_callback(self, config, level):
+ self.maximum_amplitude = config.maximum_amplitude
+ self.window_size = config.window_size
+ self.data_buffer.window_size = self.window_size
+ return config
+
+ def timer_cb(self, timer):
+ window_size = self.window_size
+ amp = self.data_buffer.read(window_size)
+ if len(amp) == 0:
+ return
+ times = np.linspace(-window_size, 0.0, len(amp))
+
+ # Plot data amplitude.
+ self.line.set_data(times, amp)
+ self.ax.set_xlim((times.min(), times.max()))
+ self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude))
+
+ self.ax.legend(loc='upper right')
+ self.fig.tight_layout()
+ if self.pub_img.get_num_connections() > 0:
+ bridge = cv_bridge.CvBridge()
+ img = convert_matplotlib_to_img(self.fig)
+ img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8')
+ img_msg.header.stamp = rospy.Time.now()
+ self.pub_img.publish(img_msg)
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py
new file mode 100644
index 0000000000..aef9609edf
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py
@@ -0,0 +1,170 @@
+from __future__ import division
+
+import array
+from threading import Lock
+
+import numpy as np
+import rospy
+import rostopic
+
+
+class DataBuffer(object):
+
+ def __init__(self, topic_name='~input',
+ expr_to_get_data='m.data',
+ input_sample_rate=500,
+ window_size=10.0,
+ is_integer=False,
+ is_signed=True,
+ bitdepth=64,
+ n_channel=1, target_channel=0,
+ get_latest_data=False,
+ discard_data=False,
+ auto_start=False):
+ self.is_subscribing = True
+ self.get_latest_data = get_latest_data
+ self.discard_data = discard_data
+ self._window_size = window_size
+ self.data_buffer_len = int(self._window_size * input_sample_rate)
+ self.lock = Lock()
+ self.bitdepth = bitdepth
+ self.n_channel = n_channel
+ self.target_channel = min(self.n_channel - 1, max(0, target_channel))
+ self.input_sample_rate = input_sample_rate
+ self.type_code = {}
+ if is_integer:
+ if is_signed:
+ codes = ['b', 'h', 'i', 'l']
+ else:
+ codes = ['B', 'H', 'I', 'L']
+ else:
+ codes = ['f', 'd']
+ for code in codes:
+ self.type_code[array.array(code).itemsize] = code
+
+ self.dtype = self.type_code[self.bitdepth / 8]
+ self.data_buffer = np.array([], dtype=self.dtype)
+
+ if is_integer:
+ self.max_value = 2 ** (self.bitdepth - 1) - 1
+ else:
+ self.max_value = None
+
+ self.topic_name = topic_name
+ self.expr_to_get_data = expr_to_get_data
+
+ if auto_start:
+ self.subscribe()
+
+ def __len__(self):
+ return len(self.data_buffer)
+
+ @property
+ def window_size(self):
+ return self._window_size
+
+ @window_size.setter
+ def window_size(self, size):
+ with self.lock:
+ self._window_size = size
+ self.data_buffer_len = int(self._window_size
+ * self.input_sample_rate)
+ self.data_buffer = np.array([], dtype=self.dtype)
+
+ @staticmethod
+ def from_rosparam(**kwargs):
+ expr_to_get_data = rospy.get_param(
+ '~expression_to_get_data', 'm.data')
+ n_channel = rospy.get_param('~n_channel', 1)
+ target_channel = rospy.get_param('~target_channel', 0)
+ data_sampling_rate = rospy.get_param('~data_sampling_rate', 500)
+ is_integer = rospy.get_param('~is_integer', False)
+ is_signed = rospy.get_param('~is_signed', True)
+ bitdepth = rospy.get_param('~bitdepth', 64)
+ return DataBuffer(expr_to_get_data=expr_to_get_data,
+ input_sample_rate=data_sampling_rate,
+ is_integer=is_integer,
+ is_signed=is_signed,
+ bitdepth=bitdepth,
+ n_channel=n_channel,
+ target_channel=target_channel,
+ **kwargs)
+
+ def subscribe(self):
+ self.data_buffer = np.array([], dtype=self.dtype)
+ # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L86-L87
+ input_class, input_topic, self.input_fn = rostopic.get_topic_class(
+ rospy.resolve_name(self.topic_name), blocking=True)
+ # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L95
+ self.sub = rospy.Subscriber(
+ input_topic, input_class, self.input_cb)
+
+ def unsubscribe(self):
+ self.sub.unregister()
+
+ def _read(self, size, normalize=False):
+ with self.lock:
+ if self.get_latest_data:
+ data_buffer = self.data_buffer[-size:]
+ else:
+ data_buffer = self.data_buffer[:size]
+ if self.discard_data:
+ self.data_buffer = self.data_buffer[size:]
+ if normalize is True:
+ if self.max_value is not None:
+ data_buffer = data_buffer / self.max_value
+ else:
+ rospy.logerr('normalize is disabled with current settings')
+ return data_buffer
+
+ def sufficient_data(self, size):
+ return len(self.data_buffer) < size
+
+ def read(self, size=None, wait=False, normalize=False):
+ if size is None:
+ size = self.data_buffer_len
+ size = int(size * self.input_sample_rate)
+ while wait is True \
+ and not rospy.is_shutdown() and len(self.data_buffer) < size:
+ rospy.sleep(0.001)
+ return self._read(size, normalize=normalize)
+
+ def close(self):
+ try:
+ self.sub.unregister()
+ except Exception:
+ pass
+ self.data_buffer = np.array([], dtype=self.dtype)
+
+ def input_cb(self, m):
+ # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L98-L99
+ if self.input_fn is not None:
+ m = self.input_fn(m)
+ data = eval(self.expr_to_get_data)
+ data_type = type(data)
+ if data_type is bytes:
+ data_buffer = np.frombuffer(data, dtype=self.dtype)
+ elif data_type in [list, tuple]:
+ data_buffer = np.fromiter(data, dtype=self.dtype)
+ elif data_type in [int, float]:
+ data_buffer = np.array([data], dtype=self.dtype)
+ else:
+ rospy.logerr('Unsupported data type: {}'.format(data_type))
+ return
+ # This division into cases assumes
+ # expr_to_get_data returns a Python built-in numeric or sequence type
+ # deserialized from a ROS msg by scripts generated by genpy.
+ # Other than uint8[], this deserialization is executed with
+ # struct.Struct.unpack() and it returns int or float (or its tuple).
+ # Against uint8[], this deserialization returns bytes.
+ # /opt/ros/melodic/lib/python2.7/dist-packages/std_msgs/msg/_Float64.py
+ # /opt/ros/melodic/lib/python2.7/dist-packages/audio_common_msgs/msg/_AudioData.py
+ # https://docs.python.org/2.7/library/struct.html#format-characters
+ # https://github.com/ros/genpy/blob/0.6.16/src/genpy/generate_struct.py#L165
+
+ data_buffer = data_buffer[self.target_channel::self.n_channel]
+ with self.lock:
+ self.data_buffer = np.append(
+ self.data_buffer, data_buffer)
+ self.data_buffer = self.data_buffer[
+ -self.data_buffer_len:]
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py
new file mode 100644
index 0000000000..adcd01f98d
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py
@@ -0,0 +1,103 @@
+from __future__ import division
+
+import numpy as np
+import rospy
+from distutils.version import LooseVersion
+import pkg_resources
+
+from jsk_recognition_msgs.msg import Spectrum
+
+from audio_to_spectrogram.data_buffer import DataBuffer
+
+
+class DataToSpectrum(object):
+
+ def __init__(self, data_buffer=None):
+ super(DataToSpectrum, self).__init__()
+
+ if data_buffer is None:
+ self.data_buffer = DataBuffer.from_rosparam(auto_start=True)
+ else:
+ self.data_buffer = data_buffer
+ fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3)
+ self.data_buffer.window_size = fft_sampling_period
+ data_sampling_rate = self.data_buffer.input_sample_rate
+
+ # fft config
+ window_function = np.arange(
+ 0.0, 1.0, 1.0 / self.data_buffer.data_buffer_len)
+ self.window_function = 0.54 - 0.46 * np.cos(
+ 2 * np.pi * window_function)
+ self.freq = np.fft.fftfreq(
+ self.data_buffer.data_buffer_len, d=1./data_sampling_rate)
+ # How many times fft is executed in one second
+ # fft_exec_rate equals to output spectrogram hz
+ self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50)
+
+ # Publisher and Subscriber
+ self.pub_spectrum = rospy.Publisher(
+ '~spectrum', Spectrum, queue_size=1)
+ self.pub_norm_half_spectrum = rospy.Publisher(
+ '~normalized_half_spectrum', Spectrum, queue_size=1)
+ self.pub_log_spectrum = rospy.Publisher(
+ '~log_spectrum', Spectrum, queue_size=1)
+
+ timer_kwargs = dict(
+ period=rospy.Duration(1.0 / self.fft_exec_rate),
+ callback=self.timer_cb,
+ oneshot=False,
+ )
+ if (LooseVersion(pkg_resources.get_distribution('rospy').version) >=
+ LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None):
+ # on >=kinetic, it raises ROSTimeMovedBackwardsException
+ # when we use rosbag play --loop.
+ timer_kwargs['reset'] = True
+ self.timer = rospy.Timer(**timer_kwargs)
+
+ def publish_spectrum(self, pub, stamp, amp, freq):
+ spectrum_msg = Spectrum()
+ spectrum_msg.header.stamp = stamp
+ spectrum_msg.amplitude = amp
+ spectrum_msg.frequency = freq
+ pub.publish(spectrum_msg)
+
+ def timer_cb(self, timer):
+ if len(self.data_buffer) != self.data_buffer.data_buffer_len:
+ return
+ data = self.data_buffer.read()
+ stamp = rospy.Time.now()
+ # Calc spectrum by fft
+ fft = np.fft.fft(data * self.window_function)
+ self.publish_spectrum(
+ self.pub_spectrum,
+ stamp,
+ np.abs(fft),
+ self.freq,
+ # Usual "amplitude spectrum".
+ # https://ryo-iijima.com/fftresult/
+ )
+ self.publish_spectrum(
+ self.pub_norm_half_spectrum,
+ stamp,
+ np.abs(fft[self.freq >= 0] /
+ (self.data_buffer.data_buffer_len / 2.0)),
+ self.freq[self.freq >= 0],
+ # Spectrum which is "half"
+ # (having non-negative frequencies (0Hz-Nyquist frequency))
+ # and is "normalized"
+ # (consistent with the amplitude of the original signal)
+ # https://ryo-iijima.com/fftresult/
+ # https://stackoverflow.com/questions/63211851/why-divide-the-output-of-numpy-fft-by-n
+ # https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issue-1550715400
+ )
+ self.publish_spectrum(
+ self.pub_log_spectrum,
+ stamp,
+ np.log(np.abs(fft)),
+ self.freq,
+ # Usually, log is applied to "power spectrum" (np.abs(fft)**2):
+ # np.log(np.abs(fft)**2), 20*np.log10(np.abs(fft)), ...
+ # But we use the above equation for simplicity.
+ # https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issuecomment-1445810380
+ # http://makotomurakami.com/blog/2020/05/23/5266/
+ )
diff --git a/audio_to_spectrogram/test/audio_to_spectrogram.test b/audio_to_spectrogram/test/audio_to_spectrogram.test
index 671975f661..41a687e295 100644
--- a/audio_to_spectrogram/test/audio_to_spectrogram.test
+++ b/audio_to_spectrogram/test/audio_to_spectrogram.test
@@ -11,6 +11,10 @@
topic_0: /spectrum_to_spectrogram/spectrogram
timeout_0: 30
+ topic_1: /audio_amplitude_plot/output/viz
+ timeout_1: 30
+ topic_2: /preprocessed_spectrogram_source_plot/output/viz
+ timeout_2: 30
diff --git a/audio_to_spectrogram/test/wrench_to_spectrogram.test b/audio_to_spectrogram/test/wrench_to_spectrogram.test
new file mode 100644
index 0000000000..78281615c9
--- /dev/null
+++ b/audio_to_spectrogram/test/wrench_to_spectrogram.test
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+ topic_0: /spectrum_to_spectrogram/spectrogram
+ timeout_0: 30
+ topic_1: /data_amplitude_plot/output/viz
+ timeout_1: 30
+ topic_2: /preprocessed_spectrogram_source_plot/output/viz
+ timeout_2: 30
+
+
+
+
diff --git a/checkerboard_detector/src/objectdetection_tf_publisher.py b/checkerboard_detector/src/objectdetection_tf_publisher.py
index f38782074f..bdcb339a28 100755
--- a/checkerboard_detector/src/objectdetection_tf_publisher.py
+++ b/checkerboard_detector/src/objectdetection_tf_publisher.py
@@ -16,9 +16,9 @@ def __init__(self):
self.br = tf.TransformBroadcaster()
self.subscriber = rospy.Subscriber("ObjectDetection", ObjectDetection, self.simple_callback);
else:
- self.init_object_messages()
self.frame_id = rospy.get_param("~frame_id", "object")
self.subscriber = rospy.Subscriber("ObjectDetection", ObjectDetection, self.callback);
+ self.init_object_messages()
def init_object_messages(self):
if rospy.has_param('~checker_board_params/position_x'):
diff --git a/doc/install_chainer_gpu.rst b/doc/install_chainer_gpu.md
similarity index 67%
rename from doc/install_chainer_gpu.rst
rename to doc/install_chainer_gpu.md
index ce5fc43147..cd4debf0b4 100644
--- a/doc/install_chainer_gpu.rst
+++ b/doc/install_chainer_gpu.md
@@ -15,37 +15,37 @@ Requirements
Version Compatibilities for 18.04
---------------------------------
-(Recommended) Use CUDA 9.1 from Official ubuntu repository (https://packages.ubuntu.com/bionic/nvidia-cuda-dev)
+(Recommended) Use CUDA 9.1 from Official ubuntu repository [https://launchpad.net/ubuntu/bionic/+package/nvidia-cuda-dev](https://launchpad.net/ubuntu/bionic/+package/nvidia-cuda-dev)
- Chainer
- - chainer == 6.7.0 (last version supoprting python2. See https://github.com/chainer/chainer/releases/tag/v6.7.0)
- - cupy-cuda91 == 6.7.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support https://docs.chainer.org/en/v6.7.0/install.html)
+ - chainer == 6.7.0 (last version supoprting python2. See [https://github.com/chainer/chainer/releases/tag/v6.7.0](https://github.com/chainer/chainer/releases/tag/v6.7.0)
+ - cupy-cuda91 == 6.7.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support [https://docs.chainer.org/en/v6.7.0/install.html](https://docs.chainer.org/en/v6.7.0/install.html)
- PyTorch
- - pytorch == 1.1.0 (Latest pytorch version supporting CUDA 9.1 https://download.pytorch.org/whl/cu90/torch_stable.html)
- - CUDA >= 9.0 (Minimum required version for PyTorch 1.1.0 https://pytorch.org/get-started/previous-versions/#v110)
+ - pytorch == 1.1.0 (Latest pytorch version supporting CUDA 9.1 [https://download.pytorch.org/whl/cu90/torch_stable.html](https://download.pytorch.org/whl/cu90/torch_stable.html)
+ - CUDA >= 9.0 (Minimum required version for PyTorch 1.1.0 [https://pytorch.org/get-started/previous-versions/#v110](https://pytorch.org/get-started/previous-versions/#v110)
-(Experimental) Use CUDA 10.2 from Nvidia Developer's site (https://developer.nvidia.com/cuda-10.2-download-archive)
+(Experimental) Use CUDA 10.2 from Nvidia Developer's site [https://developer.nvidia.com/cuda-10.2-download-archive](https://developer.nvidia.com/cuda-10.2-download-archive)
- Chainer
- - chainer == 6.7.0 (last version supoprting python2. See https://github.com/chainer/chainer/releases/tag/v6.7.0)
- - cupy >=6.7.0,<7.0.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support https://docs.chainer.org/en/v6.7.0/install.html)
+ - chainer == 6.7.0 (last version supoprting python2. See [https://github.com/chainer/chainer/releases/tag/v6.7.0](https://github.com/chainer/chainer/releases/tag/v6.7.0)
+ - cupy >=6.7.0,<7.0.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support [https://docs.chainer.org/en/v6.7.0/install.html](https://docs.chainer.org/en/v6.7.0/install.html)
- cuDNN < 8 (cupy 6.7.0 requires cuDNN v5000= and <=v7999)
- - CUDA 10.2 (cuDNN v7.6.5 requires CUDA 10.2 https://developer.nvidia.com/rdp/cudnn-archive)
+ - CUDA 10.2 (cuDNN v7.6.5 requires CUDA 10.2 [https://developer.nvidia.com/rdp/cudnn-archive](https://developer.nvidia.com/rdp/cudnn-archive))
- PyTorch
- pytorch >= 1.4.0
- - CUDA >= 9.2 (Minimum required version for PyTorch https://pytorch.org/get-started/previous-versions/#v140)
- - Driver Version >= 396.26 (From CUDA Toolkit and Corresponding Driver Versions in https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)
+ - CUDA >= 9.2 (Minimum required version for PyTorch [https://pytorch.org/get-started/previous-versions/#v140](https://pytorch.org/get-started/previous-versions/#v140)
+ - Driver Version >= 396.26 (From CUDA Toolkit and Corresponding Driver Versions in [https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)
Install CUDA
------------
-- Ubuntu 14.04 : Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux::
+- Ubuntu 14.04 : Download deb file from [https://developer.nvidia.com/cuda-downloads?target_os=Linux](https://developer.nvidia.com/cuda-downloads?target_os=Linux):
```bash
# If you'd like to use CUDA8.0 on Ubuntu 14.04.
@@ -56,7 +56,7 @@ Install CUDA
sudo apt-get install cuda
```
- - Add below to your `~/.bashrc`::
+ - Add below to your `~/.bashrc`:
```bash
# setup cuda & cudnn
@@ -76,7 +76,7 @@ Install CUDA
```
-- Ubuntu 16.04 : Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux::
+- Ubuntu 16.04 : Download deb file from [https://developer.nvidia.com/cuda-downloads?target_os=Linux](https://developer.nvidia.com/cuda-downloads?target_os=Linux):
```bash
# If you'd like to use CUDA9.2 on Ubuntu 16.04.
@@ -91,7 +91,7 @@ Install CUDA
sudo apt install nvidia-cuda-toolkit
sudo apt install nvidia-cuda-dev
-- (Experimental) Ubuntu 18.04 : CUDA 10.2 is the latest version which supports `jsk_perception`. Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux::
+- (Experimental) Ubuntu 18.04 : CUDA 10.2 is the latest version which supports `jsk_perception`. Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux:
```bash
# If you'd like to use CUDA10.2 on Ubuntu 18.04.
@@ -124,11 +124,11 @@ Install CUDA
Install CUDNN
-------------
-- If you install `pip install cupy-cuda91`, you do not need to install CUDNN manually. (c.f. https://github.com/jsk-ros-pkg/jsk_visualization/issues/809). Thus, default 18.04 user can use CUDA 9.1 and `cupy-cuda91==6.7.0` for `chainer==6.7.0` and you can SKIP this section.
+- If you install `pip install cupy-cuda91`, you do not need to install CUDNN manually. (c.f. [https://github.com/jsk-ros-pkg/jsk_visualization/issues/809](https://github.com/jsk-ros-pkg/jsk_visualization/issues/809)). Thus, default 18.04 user can use CUDA 9.1 and `cupy-cuda91==6.7.0` for `chainer==6.7.0` and you can SKIP this section.
Installing CUDNN manually only requires for experimental user who install CUDA 10.2 manually.
-- You need to login at https://developer.nvidia.com/cudnn
+- You need to login at [https://developer.nvidia.com/cudnn](https://developer.nvidia.com/cudnn)
- Go to cuDNN Download and choose version
- Download deb files of cuDNN Runtime Library and cuDNN Developer Library
@@ -156,7 +156,7 @@ Install Cupy
- (Default) Chainer 6.7.0 requires CuPy 6.7.0 and if you have CUDA 9.1, you can use CuPy pre-compiled binary package.
- - Pre-compiled Install Cupy for CUDA 9.1 ::
+ - Pre-compiled Install Cupy for CUDA 9.1 :
```bash
sudo pip install cupy-cuda91==6.7.0
@@ -164,7 +164,7 @@ Install Cupy
- (Experimental) If you have newer CUDA version. You need to install CuPy with source distribution. This requires CUDNN before you run `pip install cupy` .
- - Source Install Cupy for CUDA 10.2 ::
+ - Source Install Cupy for CUDA 10.2 :
```bash
sudo pip install -vvv cupy --no-cache-dir
@@ -174,7 +174,7 @@ Install Cupy
Install PyTorch
---------------
-- 18.04 provides CUDA 9.1 by defualt. To install PyTorch compatible with this version, download following wheel from https://download.pytorch.org/whl/cu90/torch_stable.html, and install manually.
+- 18.04 provides CUDA 9.1 by defualt. To install PyTorch compatible with this version, download following wheel from [https://download.pytorch.org/whl/cu90/torch_stable.html](https://download.pytorch.org/whl/cu90/torch_stable.html), and install manually.
```bash
sudo pip install torch-1.1.0-cp27-cp27mu-linux_x86_64.whl
@@ -187,12 +187,12 @@ Install PyTorch
sudo pip install torch==1.4.0
```
-- See https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260 for more info.
+- See [https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260](https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260) for more info.
Try Chainer Samples
-----------
-You can try to run samples to check if the installation succeeded::
+You can try to run samples to check if the installation succeeded:
roslaunch jsk_perception sample_fcn_object_segmentation.launch gpu:=0
roslaunch jsk_perception sample_people_pose_estimation_2d.launch GPU:=0
@@ -201,7 +201,7 @@ You can try to run samples to check if the installation succeeded::
Try PyTorch Samples
-----------
-You can try to run samples to check if the installation succeeded::
+You can try to run samples to check if the installation succeeded:
roslaunch jsk_perception sample_hand_pose_estimation_2d.launch gpu:=0
diff --git a/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst b/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst
index 40debd2693..77a78990b8 100644
--- a/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst
+++ b/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst
@@ -12,6 +12,12 @@ Subscribing Topic
* ``~input`` (``sensor_msgs/PoinCloud2``)
+Advertising Services
+-----------------
+* ``~save_pcd`` (``std_srvs/Empty``)
+
+ Convert PointCloud2 to pcd, saved under `{prefix}{stamp}.pcd`.
+
Parameters
----------
@@ -37,3 +43,5 @@ Parameters
* ``duration`` (Double, default: ``1.0``)
Saving duration. You can change saving frequency with this parameter.
+ If the duration is greater than 0.0, the pcd data is stored continuously under the set duration.
+ When you want to use this as ROS service, set the duration to 0.0.
diff --git a/doc/jsk_perception/nodes/aws_detect_faces.rst b/doc/jsk_perception/nodes/aws_detect_faces.rst
index 1c23d9fc94..6bbd29abf4 100644
--- a/doc/jsk_perception/nodes/aws_detect_faces.rst
+++ b/doc/jsk_perception/nodes/aws_detect_faces.rst
@@ -22,7 +22,7 @@ Subscribing Topic
Publishing Topic
----------------
-* ``~faces`` (``opencv_msgs/FaceArrayStamped``)
+* ``~faces`` (``opencv_apps/FaceArrayStamped``)
Detected face positions, facial landmarks, emotions, presence of beard, sunglasses, and so on.
diff --git a/doc/jsk_perception/nodes/classification_node.md b/doc/jsk_perception/nodes/classification_node.md
new file mode 100644
index 0000000000..3262618c48
--- /dev/null
+++ b/doc/jsk_perception/nodes/classification_node.md
@@ -0,0 +1,83 @@
+# classification_node.py
+
+![](images/clip.png)
+
+The ROS node for Classification with CLIP.
+
+## System Configuration
+![](images/large_scale_vil_system.png)
+
+This node requires to work with the Docker Container for inference. Please build the container at first following Setup instruction.
+
+### Prerequisite
+This node requires NVIDIA GPU and more than 4GB GRAM to work properly.
+You have to install nvidia-container-toolkit for using GPU with docker. Please follow [official instruction](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
+
+### Build the docker image
+You have to build the docker image of OFA
+
+```shell
+roscd jsk_perception/docker
+make
+```
+
+## Subscribing topic
+* `~image` (`sensor_msgs/Image`)
+
+ Input image
+
+## Publishing topic
+* `~result` (`jsk_recognition_msgs/ClassificationResult`)
+
+ Classification result
+
+* `~result/image` (`sensor_msgs/Image`)
+
+ Images used for inference
+
+* `~visualize` (`std_msgs/String`)
+
+ Classification result to visualize
+
+## Action topic
+* `~inference_server/goal` (`jsk_recognition_msgs/ClassificationTaskActionGoal`)
+
+ Classification request with custom categories and image
+
+* `~inference_server/result` (`jsk_recognition_msgs/ClassificationTaskActionResult`)
+
+ Clasification result of `~inference_server/goal`
+
+## Parameters
+* `~host` (String, default: `localhost`)
+
+ The host name or IP of inference container
+
+* `~port` (Integer, default: `8080`)
+
+ The HTTP port of inference container
+
+## Dynamic Reconfigure Parameters
+* `~queries` (string, default: `human;kettle;cup;glass`)
+
+ Default categories used for subscribing image topic.
+
+ You can send multiple queries with separating semicolon.
+
+### Run inference container on another host or another terminal
+In the remote GPU machine,
+```shell
+cd jsk_recognition/jsk_perception/docker
+./run_jsk_vil_api clip --port (Your vacant port)
+```
+
+In the ROS machine,
+```shell
+roslaunch jsk_perception classification.launch port:=(Your inference container port) host:=(Your inference container host) CLASSIFICATION_INPUT_IMAGE:=(Your image topic name) gui:=true
+```
+
+
+### Run both inference container and ros node in single host
+```
+roslaunch jsk_perception classification.launch run_api:=true CLASSIFICATION_INPUT_IMAGE:=(Your image topic name) gui:=true
+```
diff --git a/doc/jsk_perception/nodes/images/clip.png b/doc/jsk_perception/nodes/images/clip.png
new file mode 100644
index 0000000000..059b31e32b
Binary files /dev/null and b/doc/jsk_perception/nodes/images/clip.png differ
diff --git a/doc/jsk_perception/nodes/images/invert_mask_image.jpg b/doc/jsk_perception/nodes/images/invert_mask_image.jpg
new file mode 100644
index 0000000000..041079262a
Binary files /dev/null and b/doc/jsk_perception/nodes/images/invert_mask_image.jpg differ
diff --git a/doc/jsk_perception/nodes/images/large_scale_vil_system.png b/doc/jsk_perception/nodes/images/large_scale_vil_system.png
new file mode 100644
index 0000000000..4856b85d09
Binary files /dev/null and b/doc/jsk_perception/nodes/images/large_scale_vil_system.png differ
diff --git a/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png b/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png
new file mode 100644
index 0000000000..fd1cde2b7b
Binary files /dev/null and b/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png differ
diff --git a/doc/jsk_perception/nodes/images/vqa.png b/doc/jsk_perception/nodes/images/vqa.png
new file mode 100644
index 0000000000..132e32784d
Binary files /dev/null and b/doc/jsk_perception/nodes/images/vqa.png differ
diff --git a/doc/jsk_perception/nodes/invert_mask_image.md b/doc/jsk_perception/nodes/invert_mask_image.md
new file mode 100644
index 0000000000..30be57a1a0
--- /dev/null
+++ b/doc/jsk_perception/nodes/invert_mask_image.md
@@ -0,0 +1,29 @@
+# InvertMaskImage
+
+![](images/invert_mask_image.jpg)
+
+Inverts the input mask image.
+
+## Subscribing Topic
+
+* `~input` (`sensor_msgs/Image`)
+
+ Input mask image.
+
+## Publishing Topic
+
+* `~output` (`sensor_msgs/Image`)
+
+ Inverted mask image.
+
+## Parameters
+
+* `~queue_size` (Int, default: `1`)
+
+ How many messages you allow about the subscriber to keep in the queue.
+
+## Sample
+
+```bash
+roslaunch jsk_perception sample_invert_mask_image.launch
+```
diff --git a/doc/jsk_perception/nodes/point_pose_extractor.md b/doc/jsk_perception/nodes/point_pose_extractor.md
index b7a7996917..bb28a99962 100644
--- a/doc/jsk_perception/nodes/point_pose_extractor.md
+++ b/doc/jsk_perception/nodes/point_pose_extractor.md
@@ -8,7 +8,8 @@
- `/ImageFeature0D` (`posedetection_msgs::ImageFeature0D`)
- Image, camera and tempalte feature information.
+ Image, camera and template feature information.
+ You can use [ImageShift](https://jsk-docs.readthedocs.io/projects/jsk_recognition/en/latest/imagesift/nodes/imagesift.html) to get it.
## Publishing Topics
@@ -30,62 +31,75 @@
- `/tf` (`tf2_msgs/TFMessage`)
- Detected Object Frame.
+ Detected Object Frame when `~publish_tf` is set to true.
## Parameters
-- `~template_filename` (str default: "/sample/opencv-logo2.png")
- + path to template image
-- `~object_width` (float)
- + Width of template image
-- `~object_height` (float)
- + Height of template image
-- `~reprojection_threshold` (float default: 3.0)
-- `~distanceratio_threshold` (float default: 0.49)
-- `~error_threshold` (float default: 50.0)
-- `~theta_step` (float default: 5.0)
-- `~phi_step` (float default: 5.0)
-- `~viewer_window` (bool default: true)
-- `~window_name` (str default: "sample1")
-- `~autosize` (bool default: false)
-- `~publish_null_object_detection` (bool default: false)
-- `~publish_tf` (bool defaut: false)
- + If set to true, detected object pose is also broadcasted as tf frame.
-- `~child_frame_id` (string default: "matching")
- + frame_id of detected object when `~broadcast_tf` is set to true.
+- `~template_filename` (str default: `"/sample/opencv-logo2.png"`)
+
+ path to template image
+
+- `~object_width` (float default: `0.06`)
+
+ Width of template image
+
+- `~object_height` (float default: `0.0739`)
+
+ Height of template image
+
+- `~relative_pose` (str default: `"0 0 0 0 0 0 1"`)
+
+ Coordinate of the object relative to the texture
+
+- `~reprojection_threshold` (float default: `3.0`)
+- `~distanceratio_threshold` (float default: `0.49`)
+- `~error_threshold` (float default: `50.0`)
+- `~theta_step` (float default: `5.0`)
+- `~phi_step` (float default: `5.0`)
+- `~viewer_window` (bool default: `true`)
+- `~window_name` (str default: `"sample1"`)
+- `~autosize` (bool default: `false`)
+
+ The window size is automatically adjusted to fit the displayed image, and you cannot change the window size manually.
+
+- `~publish_null_object_detection` (bool default: `false`)
+- `~publish_tf` (bool defaut: `false`)
+
+ If set to true, detected object pose is also broadcasted as tf frame.
+
+- `~child_frame_id` (string default: `"matching"`)
+
+ frame_id of detected object when `~publish_tf` is set to true.
## Service
-- `/SetTemplate`
+- `/SetTemplate` (`SetTemplate`)
Used to add another template.
+## Mouse Event
+
+![](images/point_pose_extractor_mouse.png)
+
+ Set template from viewer window.
+
+ To specify the range of template, left-click four corners clockwise from upper left. Selected points are reset by right-clicking.
+
+ After all four points are selected, you can input template's width, height and filename. The filename should have an extention.
+
## Sample
`sample_point_pose_extractor.launch`
-```
-
-
-
-
-
-
-
-
-```
```
-roslaunch sample_point_pose_extractor.launch
+roslaunch jsk_perception sample_point_pose_extractor.launch
rostopic echo /ObjectDetection
```
### Example of how to run set_template service
+ `client.call` is available only after the node receives `ImageFeature0D`.
+
```
#!/usr/bin/env python
# -*- coding: utf-8 -*-
diff --git a/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md b/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md
index e32930dfac..adfa13e358 100644
--- a/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md
+++ b/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md
@@ -7,10 +7,6 @@ Sample is `sample_rect_array_in_panorama_to_bounding_box_array.launch`
## Subscribing Topics
-* `~panorama_image` (`sensor_msgs/Image`)
-
-Panorama image topic.
-
* `~panorama_info` (`jsk_recognition_msgs/PanoramaInfo`)
Panorama info topic.
diff --git a/doc/jsk_perception/nodes/vqa_node.md b/doc/jsk_perception/nodes/vqa_node.md
new file mode 100644
index 0000000000..f31f0c0498
--- /dev/null
+++ b/doc/jsk_perception/nodes/vqa_node.md
@@ -0,0 +1,96 @@
+# vqa_node.py
+![](images/vqa.png)
+
+The ROS node for Visual Question Answering(VQA).
+
+## System Configuration
+![](images/large_scale_vil_system.png)
+
+This node requires to work with the Docker Container for inference. Please build the container at first following Setup instruction.
+
+## Setup
+
+### Prerequisite
+This node requires NVIDIA GPU and more than 4GB GRAM to work properly.
+You have to install nvidia-container-toolkit for using GPU with docker. Please follow [official instruction](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
+
+### Build the docker image
+You have to build the docker image of OFA
+
+```shell
+roscd jsk_perception/docker
+make
+```
+
+## Subscribing topic
+* `~image` (`sensor_msgs/Image`)
+
+ Input image
+
+## Publishing topic
+* `~result` (`jsk_recognition_msgs/VQAResult`)
+
+ VQA result
+
+* `~result/image` (`sensor_msgs/Image`)
+
+ Images used for inference
+
+* `~visualize` (`std_msgs/String`)
+
+ VQA question and answer to visualize
+
+## Action topic
+* `~inference_server/goal` (`jsk_recognition_msgs/VQATaskActionGoal`)
+
+ VQA request with custom questions and image
+
+* `~inference_server/result` (`jsk_recognition_msgs/VQATaskActionResult`)
+
+ VQA result of `~inference_server/goal`
+
+## Parameters
+* `~host` (String, default: `localhost`)
+
+ The host name or IP of inference container
+
+* `~port` (Integer, default: `8080`)
+
+ The HTTP port of inference container
+
+## Dynamic Reconfigure Parameters
+* `~questions` (string, default: `what does this image describe?`)
+
+ Default questions used for subscribing image topic.
+
+ You can send multiple questions with separating semicolon like the below.
+ ```
+ What does this image describe?;What kinds of objects exists?
+ ```
+
+## Sample
+
+### Run inference container on another host or another terminal
+In the remote GPU machine,
+```shell
+cd jsk_recognition/jsk_perception/docker
+./run_jsk_vil_api --port (Your vacant port) --ofa_task caption --ofa_model_scale huge
+```
+
+
+`--ofa_task` should be `caption` or `vqa`. Empirically, the output results are more natural for VQA tasks with the Caption model than with the VQA model in OFA.
+
+
+If you encounter GPU ram shortage, please make `ofa_model_scale` `large` .
+
+
+In the ROS machine,
+```shell
+roslaunch jsk_perception vqa.launch port:=(Your inference container port) host:=(Your inference container host) VQA_INPUT_IMAGE:=(Your image topic name) gui:=true
+```
+
+
+### Run both inference container and ros node in single host
+```
+roslaunch jsk_perception vqa.launch run_api:=true VQA_INPUT_IMAGE:=(Your image topic name) gui:=true
+```
diff --git a/imagesift/test/test_imagesift.test b/imagesift/test/test_imagesift.test
index 2ca00099cf..929c19e6a5 100644
--- a/imagesift/test/test_imagesift.test
+++ b/imagesift/test/test_imagesift.test
@@ -6,7 +6,8 @@
+ pkg="jsk_tools" type="test_topic_published.py"
+ retry="3">
topic_0: /Feature0D
timeout_0: 10
diff --git a/jsk_pcl_ros/CMakeLists.txt b/jsk_pcl_ros/CMakeLists.txt
index 49b1b37a93..34cd2d65d9 100644
--- a/jsk_pcl_ros/CMakeLists.txt
+++ b/jsk_pcl_ros/CMakeLists.txt
@@ -142,7 +142,7 @@ generate_dynamic_reconfigure_options(
find_package(OpenCV REQUIRED core imgproc)
-include_directories(include ${catkin_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS})
+include_directories(include ${catkin_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS} ${ml_classifiers_INCLUDE_DIRS})
link_directories(${catkin_LIBRARY_DIRS})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z defs")
@@ -507,10 +507,34 @@ install(TARGETS ${jsk_pcl_ros_install_libraries}
LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
)
-install(DIRECTORY config euslisp launch plugins sample scripts test
+install(DIRECTORY config euslisp launch plugins test
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS
)
+if($ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
+install(DIRECTORY sample scripts
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+ USE_SOURCE_PERMISSIONS
+ )
+else()
+install(DIRECTORY sample scripts
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+ USE_SOURCE_PERMISSIONS
+ PATTERN "*"
+ PATTERN "*/*.py" EXCLUDE
+)
+
+file(GLOB SCRIPT_PROGRAMS scripts/*.py)
+catkin_install_python(
+ PROGRAMS ${SCRIPT_PROGRAMS}
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/scripts
+)
+file(GLOB SAMPLE_SCRIPT_PROGRAMS sample/*.py)
+catkin_install_python(
+ PROGRAMS ${SAMPLE_SCRIPT_PROGRAMS}
+ DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/sample
+)
+endif()
if (CATKIN_ENABLE_TESTING)
find_package(jsk_perception REQUIRED)
diff --git a/jsk_pcl_ros/package.xml b/jsk_pcl_ros/package.xml
index 51635bba71..b4ef3165d4 100644
--- a/jsk_pcl_ros/package.xml
+++ b/jsk_pcl_ros/package.xml
@@ -91,6 +91,8 @@
visualization_msgs
yaml-cpp
+ compressed_depth_image_transport
+ compressed_image_transport
jsk_perception
jsk_tools
roslaunch
diff --git a/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml b/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml
index b77e1cb643..f88073eacd 100644
--- a/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml
+++ b/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml
@@ -6,11 +6,12 @@
+
+ args="--rate $(arg RATE) $(find jsk_pcl_ros)/sample/data/baxter_realsense_l515.bag --clock --loop"/>
diff --git a/jsk_pcl_ros/sample/sample_incremental_model_registration.launch b/jsk_pcl_ros/sample/sample_incremental_model_registration.launch
index a5d35e1ea8..4e43474db5 100644
--- a/jsk_pcl_ros/sample/sample_incremental_model_registration.launch
+++ b/jsk_pcl_ros/sample/sample_incremental_model_registration.launch
@@ -3,7 +3,7 @@
-
+
-
+
+
+
diff --git a/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py b/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py
index 5e464e0745..f18e6d01f0 100755
--- a/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py
+++ b/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py
@@ -348,7 +348,7 @@ def updatePlot():
def generateFrequencyMap():
global width, height
# bgr
- img = np.tile(np.uint8([0,0,0]), (height / 10, width / 10, 1))
+ img = np.tile(np.uint8([0,0,0]), (height // 10, width // 10, 1))
frequency = dict()
for (u, v) in value_cache.keys():
min_color = np.uint8([255, 0, 0])
diff --git a/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp b/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp
index 8999bbd05b..abb16d9b84 100644
--- a/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp
+++ b/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp
@@ -207,6 +207,7 @@ namespace jsk_pcl_ros
std_srvs::Empty::Request& req,
std_srvs::Empty::Response& res)
{
+ boost::mutex::scoped_lock lock(mutex_);
if (samples_.size() <= 1) {
ROS_ERROR("no enough samples");
return false;
@@ -251,6 +252,7 @@ namespace jsk_pcl_ros
nonregistered_ros_cloud.header.stamp = ros::Time::now();
nonregistered_ros_cloud.header.frame_id = frame_id_;
pub_cloud_non_registered_.publish(nonregistered_ros_cloud);
+ return true;
}
}
diff --git a/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test b/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test
index eb0e57e0b7..378be8ad4e 100644
--- a/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test
+++ b/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test
@@ -92,7 +92,7 @@
+ time-limit="30" retry="3">
check_times: 10
@@ -102,7 +102,7 @@
+ time-limit="30" retry="3">
check_times: 10
@@ -112,7 +112,7 @@
+ time-limit="30" retry="3">
check_times: 10
diff --git a/jsk_pcl_ros/test/test_convex_connected_voxels.test b/jsk_pcl_ros/test/test_convex_connected_voxels.test
index 6f632d3796..e22143f7d6 100644
--- a/jsk_pcl_ros/test/test_convex_connected_voxels.test
+++ b/jsk_pcl_ros/test/test_convex_connected_voxels.test
@@ -12,7 +12,8 @@
+ pkg="jsk_tools" type="test_topic_published.py"
+ retry="3" time-limit="120">
topic_0: /sample_convex_connected_voxels/output/indices
timeout_0: 100
diff --git a/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test b/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test
index bd3a095e4d..56519753ae 100644
--- a/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test
+++ b/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test
@@ -2,15 +2,16 @@
+
+ retry="3" time-limit="120">
topic_0: /statistical_outlier_removal/output
- timeout_0: 30
+ timeout_0: 100
diff --git a/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h b/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h
index 5720025844..78ae3d4172 100644
--- a/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h
+++ b/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h
@@ -41,6 +41,7 @@
#include
#include
+#include
#include
#include
@@ -55,6 +56,8 @@ namespace jsk_pcl_ros_utils
virtual void onInit();
virtual void timerCallback (const ros::TimerEvent& event);
virtual void configCallback(Config &config, uint32_t level);
+ void savePCD();
+ bool savePCDCallback(std_srvs::Empty::Request& req, std_srvs::Empty::Response& res);
boost::mutex mutex_;
boost::shared_ptr > srv_;
ros::Timer timer_;
@@ -66,6 +69,7 @@ namespace jsk_pcl_ros_utils
bool compressed_;
std::string fixed_frame_;
tf::TransformListener* tf_listener_;
+ ros::ServiceServer srv_save_pcd_server_;
private:
};
}
diff --git a/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py b/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py
index 818d38d66d..ce05f59536 100755
--- a/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py
+++ b/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py
@@ -6,6 +6,27 @@
from jsk_rviz_plugins.overlay_text_interface import OverlayTextInterface
from jsk_recognition_msgs.msg import BoolStamped
+# workaround until https://github.com/jsk-ros-pkg/jsk_visualization/pull/864 is merged and released.
+class OverlayTextInterface_fix(OverlayTextInterface):
+ def publish(self, text):
+ msg = OverlayText()
+ msg.text = text
+ msg.width = int(self.config.width)
+ msg.height = int(self.config.height)
+ msg.top = int(self.config.top)
+ msg.left = int(self.config.left)
+ msg.fg_color.a = self.config.fg_alpha
+ msg.fg_color.r = self.config.fg_red
+ msg.fg_color.g = self.config.fg_green
+ msg.fg_color.b = self.config.fg_blue
+ msg.bg_color.a = self.config.bg_alpha
+ msg.bg_color.r = self.config.bg_red
+ msg.bg_color.g = self.config.bg_green
+ msg.bg_color.b = self.config.bg_blue
+ msg.text_size = self.config.text_size
+ self.pub.publish(msg)
+
+
g_lock = Lock()
g_msg = None
def callback(msg):
@@ -23,7 +44,7 @@ def publish_text(event):
if __name__ == "__main__":
rospy.init_node("cloud_on_plane_info")
- text_interface = OverlayTextInterface("~text")
+ text_interface = OverlayTextInterface_fix("~text")
sub = rospy.Subscriber("~input", BoolStamped, callback)
rospy.Timer(rospy.Duration(0.1), publish_text)
rospy.spin()
diff --git a/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp b/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp
index 294a7d4bbc..fe9be68586 100644
--- a/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp
+++ b/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp
@@ -49,7 +49,16 @@ namespace jsk_pcl_ros_utils
timer_.stop();
}
- void PointCloudToPCD::timerCallback (const ros::TimerEvent& event)
+ void PointCloudToPCD::timerCallback (const ros::TimerEvent& event) {
+ savePCD();
+ }
+
+ bool PointCloudToPCD::savePCDCallback(std_srvs::Empty::Request& req, std_srvs::Empty::Response& res) {
+ savePCD();
+ return true;
+ }
+
+ void PointCloudToPCD::savePCD()
{
pcl::PCLPointCloud2::ConstPtr cloud;
cloud = ros::topic::waitForMessage("input", *pnh_);
@@ -58,7 +67,7 @@ namespace jsk_pcl_ros_utils
return;
}
- ROS_INFO ("Received %d data points in frame %s with the following fields: %s",
+ NODELET_INFO ("Received %d data points in frame %s with the following fields: %s",
(int)cloud->width * cloud->height,
cloud->header.frame_id.c_str (),
pcl::getFieldsList (*cloud).c_str ());
@@ -67,7 +76,7 @@ namespace jsk_pcl_ros_utils
Eigen::Quaternionf q = Eigen::Quaternionf::Identity ();
if (!fixed_frame_.empty ()) {
if (!tf_listener_->waitForTransform (fixed_frame_, cloud->header.frame_id, pcl_conversions::fromPCL (cloud->header).stamp, ros::Duration (duration_))) {
- ROS_WARN("Could not get transform!");
+ NODELET_WARN("Could not get transform!");
return;
}
tf::StampedTransform transform_stamped;
@@ -81,7 +90,7 @@ namespace jsk_pcl_ros_utils
std::stringstream ss;
ss << prefix_ << cloud->header.stamp << ".pcd";
- ROS_INFO ("Data saved to %s", ss.str ().c_str ());
+ NODELET_INFO ("Data saved to %s", ss.str ().c_str ());
pcl::PCDWriter writer;
if(binary_)
@@ -107,21 +116,22 @@ namespace jsk_pcl_ros_utils
srv_ = boost::make_shared > (*pnh_);
dynamic_reconfigure::Server::CallbackType f = boost::bind(&PointCloudToPCD::configCallback, this, _1, _2);
srv_->setCallback (f);
+ srv_save_pcd_server_ = pnh_->advertiseService("save_pcd", &PointCloudToPCD::savePCDCallback, this);
tf_listener_ = jsk_recognition_utils::TfListenerSingleton::getInstance();
if(binary_)
{
if(compressed_)
{
- ROS_INFO_STREAM ("Saving as binary compressed PCD");
+ NODELET_INFO("Saving as binary compressed PCD") ;
}
else
{
- ROS_INFO_STREAM ("Saving as binary PCD");
+ NODELET_INFO("Saving as binary PCD");
}
}
else
{
- ROS_INFO_STREAM ("Saving as binary PCD");
+ NODELET_INFO("Saving as ASCII PCD");
}
}
@@ -133,7 +143,10 @@ namespace jsk_pcl_ros_utils
compressed_ = config.compressed;
fixed_frame_ = config.fixed_frame;
duration_ = config.duration;
- timer_ = pnh_->createTimer(ros::Duration(duration_), boost::bind(&PointCloudToPCD::timerCallback, this, _1));
+ timer_.stop();
+ if (duration_ != 0) {
+ timer_ = pnh_->createTimer(ros::Duration(duration_), boost::bind(&PointCloudToPCD::timerCallback, this, _1));
+ }
}
}
diff --git a/jsk_perception/CMakeLists.txt b/jsk_perception/CMakeLists.txt
index 6b568589ae..428e6c8a98 100644
--- a/jsk_perception/CMakeLists.txt
+++ b/jsk_perception/CMakeLists.txt
@@ -43,9 +43,7 @@ find_package(catkin REQUIRED COMPONENTS
find_package(roseus)
-if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
- catkin_python_setup()
-endif()
+catkin_python_setup()
find_package(Boost REQUIRED COMPONENTS filesystem system)
@@ -155,6 +153,9 @@ generate_dynamic_reconfigure_options(
cfg/VirtualCameraMono.cfg
cfg/AWSDetectFaces.cfg
cfg/RemoveBlurredFrames.cfg
+ cfg/VQA.cfg
+ cfg/Classification.cfg
+ cfg/VideoToScene.cfg
)
message(WARNING "DEPRECATION WARNING: srv files in jsk_pcl_ros are deprecated."
@@ -238,6 +239,7 @@ jsk_add_nodelet(src/sparse_image_decoder.cpp "jsk_perception/SparseImageDecoder"
jsk_add_nodelet(src/color_histogram.cpp "jsk_perception/ColorHistogram" "color_histogram")
jsk_add_nodelet(src/background_substraction_nodelet.cpp "jsk_perception/BackgroundSubstraction" "background_substraction")
jsk_add_nodelet(src/fisheye_to_panorama.cpp "jsk_perception/FisheyeToPanorama" "fisheye_to_panorama")
+jsk_add_nodelet(src/point_pose_extractor.cpp "jsk_perception/PointPoseExtractor" "point_pose_extractor")
# See https://github.com/drNoob13/fisheyeStitcher#pre-requisite
if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.0" AND # >= 7.1
"${OpenCV_VERSION}" VERSION_GREATER "2.9.9" AND # >= 3.0.0
@@ -260,6 +262,7 @@ jsk_add_nodelet(src/overlay_image_color_on_mono.cpp "jsk_perception/OverlayImage
jsk_add_nodelet(src/grid_label.cpp "jsk_perception/GridLabel" "grid_label")
jsk_add_nodelet(src/color_histogram_label_match.cpp "jsk_perception/ColorHistogramLabelMatch" "color_histogram_label_match")
jsk_add_nodelet(src/apply_mask_image.cpp "jsk_perception/ApplyMaskImage" "apply_mask_image")
+jsk_add_nodelet(src/invert_mask_image.cpp "jsk_perception/InvertMaskImage" "invert_mask_image")
jsk_add_nodelet(src/unapply_mask_image.cpp "jsk_perception/UnapplyMaskImage" "unapply_mask_image")
jsk_add_nodelet(src/single_channel_histogram.cpp "jsk_perception/SingleChannelHistogram" "single_channel_histogram")
jsk_add_nodelet(src/blob_detector.cpp "jsk_perception/BlobDetector" "blob_detector")
@@ -294,7 +297,9 @@ jsk_add_nodelet(src/gaussian_blur.cpp "jsk_perception/GaussianBlur" "gaussian_bl
jsk_add_nodelet(src/kmeans.cpp "jsk_perception/KMeans" "kmeans")
jsk_add_nodelet(src/draw_rects.cpp "jsk_perception/DrawRects" "draw_rects")
jsk_add_nodelet(src/remove_blurred_frames.cpp "jsk_perception/RemoveBlurredFrames" "remove_blurred_frames")
+jsk_add_nodelet(src/split_image.cpp "jsk_perception/SplitImage" "split_image")
if("${OpenCV_VERSION}" VERSION_GREATER "2.9.9") # >= 3.0.0
+ jsk_add_nodelet(src/video_to_scene.cpp "jsk_perception/VideoToScene" "video_to_scene")
jsk_add_nodelet(src/bing.cpp "jsk_perception/Bing" "bing")
endif()
jsk_add_nodelet(src/tabletop_color_difference_likelihood.cpp "jsk_perception/TabletopColorDifferenceLikelihood" "tabletop_color_difference_likelihood")
@@ -352,7 +357,6 @@ target_link_libraries(oriented_gradient ${catkin_LIBRARIES} ${Boost_LIBRARIES})
jsk_add_node(src/camshiftdemo.cpp camshiftdemo)
jsk_add_node(src/linemod.cpp linemod)
-jsk_add_node(src/point_pose_extractor.cpp point_pose_extractor)
jsk_add_node(src/white_balance_converter.cpp white_balance_converter)
jsk_add_node(src/rectangle_detector.cpp rectangle_detector)
jsk_add_node(src/calc_flow.cpp calc_flow)
@@ -449,9 +453,17 @@ catkin_install_python(
PROGRAMS ${NODE_SCRIPT_PROGRAMS}
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts
)
+if(CATKIN_DEVEL_PREFIX)
+ add_custom_target(link_dir_mkdir
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts)
+ foreach(SUB_DIR craft deep_sort hmr openpose)
+ add_custom_target(link_dir_${SUB_DIR} ALL
+ COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/node_scripts/${SUB_DIR} ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/${SUB_DIR})
+ add_dependencies(link_dir_${SUB_DIR} link_dir_mkdir)
+ endforeach()
+endif(CATKIN_DEVEL_PREFIX)
endif()
-
# ------------------------------------------------------------------------------------
# Test
# ------------------------------------------------------------------------------------
@@ -547,6 +559,8 @@ if(CATKIN_ENABLE_TESTING)
jsk_add_rostest(test/overlay_image_color_on_mono.test)
jsk_add_rostest(test/people_pose_estimation_2d.test)
jsk_add_rostest(test/human_mesh_recovery.test)
+ jsk_add_rostest(test/point_pose_extractor.test)
+ jsk_add_rostest(test/point_pose_extractor_group_ns.test)
jsk_add_rostest(test/pointit.test)
jsk_add_rostest(test/polygon_array_color_histogram.test)
jsk_add_rostest(test/polygon_array_color_likelihood.test)
@@ -586,6 +600,9 @@ if(CATKIN_ENABLE_TESTING)
jsk_add_rostest(test/ycc_decomposer.test)
jsk_add_rostest(test/depth_image_filter.test)
jsk_add_rostest(test/remove_blurred_frames.test)
+ if(TARGET JSK_NODELET_${PROJECT_NAME}_video_to_scene)
+ jsk_add_rostest(test/video_to_scene.test)
+ endif()
if(TARGET JSK_NODELET_${PROJECT_NAME}_bing)
jsk_add_rostest(test/bing.test)
endif()
diff --git a/jsk_perception/cfg/Classification.cfg b/jsk_perception/cfg/Classification.cfg
new file mode 100755
index 0000000000..83a569377c
--- /dev/null
+++ b/jsk_perception/cfg/Classification.cfg
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+PACKAGE = "jsk_perception"
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+gen = ParameterGenerator()
+
+gen.add("queries", str_t, 0, "A queries of Caption Task. The sentences are separated with ;", "human;kettle;cup;glass")
+
+exit(gen.generate(PACKAGE, "jsk_perception", "Classification"))
diff --git a/jsk_perception/cfg/VQA.cfg b/jsk_perception/cfg/VQA.cfg
new file mode 100755
index 0000000000..2f97b15c61
--- /dev/null
+++ b/jsk_perception/cfg/VQA.cfg
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+PACKAGE = "jsk_perception"
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+gen = ParameterGenerator()
+
+gen.add("questions", str_t, 0,
+ "Default questions of VQA. You can split questions by ;",
+ "what does this image describe?")
+
+exit(gen.generate(PACKAGE, "jsk_perception", "VQA"))
diff --git a/jsk_perception/cfg/VideoToScene.cfg b/jsk_perception/cfg/VideoToScene.cfg
new file mode 100755
index 0000000000..617e99ffca
--- /dev/null
+++ b/jsk_perception/cfg/VideoToScene.cfg
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+
+PACKAGE='jsk_perception'
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+gen = ParameterGenerator()
+
+gen.add("min_percent", int_t, 0, "The percentage of forgrand when the motion has stopped and publish image", 5, 0, 100)
+gen.add("max_percent", int_t, 0, "The percentage of forgrand when the scene is not settled", 20, 0, 100)
+
+exit(gen.generate(PACKAGE, "jsk_perception", "VideoToScene"))
diff --git a/jsk_perception/docker/Makefile b/jsk_perception/docker/Makefile
new file mode 100644
index 0000000000..83eb2800ab
--- /dev/null
+++ b/jsk_perception/docker/Makefile
@@ -0,0 +1,51 @@
+#
+# Author: Yoshiki Obinata
+#
+
+# api directories
+OFAPROJECT = ofa
+CLIPPROJECT = clip
+# image names
+OFAIMAGE = jsk-ofa-server
+CLIPIMAGE = jsk-clip-server
+# commands
+BUILDIMAGE = docker build
+REMOVEIMAGE = docker rmi
+# configuration directories
+ifdef ROS_HOME
+CONFIGDIR = $(ROS_HOME)/data/jsk_perception
+else
+CONFIGDIR = $(HOME)/.ros/data/jsk_perception
+endif
+PARAMDIR = $(CONFIGDIR)/vil_params
+# Download files
+PARAMURLS = parameter_urls.txt
+# OFA parameters
+OFAPARAMFILES = $(foreach param, $(OFAPARAMS), $(PARAMDIR)/$(param))
+
+all: ofa clip
+
+# TODO check command wget exists, nvidia-driver version
+
+# TODO add clip
+$(PARAMDIR)/.download:
+ @if [ ! -d $(PARAMDIR) ]; then\
+ mkdir -p $(PARAMDIR); \
+ fi
+ wget -c -i $(PARAMURLS) -P $(PARAMDIR)
+ touch $(PARAMDIR)/.download
+
+ofa: $(PARAMDIR)/.download
+ $(BUILDIMAGE) $(OFAPROJECT) -t $(OFAIMAGE) -f $(OFAPROJECT)/Dockerfile
+
+clip: $(PARAMDIR)/.download
+ $(BUILDIMAGE) $(CLIPPROJECT) -t $(CLIPIMAGE) -f $(CLIPPROJECT)/Dockerfile
+
+# TODO add clip, glip
+clean:
+ @$(REMOVEIMAGE) $(OFAIMAGE)
+
+wipe: clean
+ rm -fr $(PARAMDIR)
+
+.PHONY: clean wipe ofa clip
diff --git a/jsk_perception/docker/clip/Dockerfile b/jsk_perception/docker/clip/Dockerfile
new file mode 100644
index 0000000000..f824e1c377
--- /dev/null
+++ b/jsk_perception/docker/clip/Dockerfile
@@ -0,0 +1,16 @@
+FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt -o Acquire::AllowInsecureRepositories=true update \
+ && apt-get install -y \
+ curl \
+ git \
+ libopencv-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+# use requirements.txt generetaed by `pip freeze | grep ==` in the container created on 2023-03-31
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt
+RUN pip install git+https://github.com/openai/CLIP.git
+RUN mkdir -p /workspace/Clip
+COPY server.py /workspace/Clip
+ENTRYPOINT cd /workspace/Clip && python server.py
\ No newline at end of file
diff --git a/jsk_perception/docker/clip/requirements.txt b/jsk_perception/docker/clip/requirements.txt
new file mode 100644
index 0000000000..74c63fe7c0
--- /dev/null
+++ b/jsk_perception/docker/clip/requirements.txt
@@ -0,0 +1,31 @@
+backcall==0.2.0
+brotlipy==0.7.0
+certifi==2020.12.5
+click==8.1.3
+conda==4.9.2
+conda-build==3.21.4
+decorator==4.4.2
+dnspython==2.1.0
+Flask==2.2.3
+ftfy==6.1.1
+importlib-metadata==6.1.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+MarkupSafe==2.1.2
+mkl-fft==1.2.0
+mkl-random==1.1.1
+mkl-service==2.3.0
+olefile==0.46
+opencv-python==4.7.0.72
+parso==0.7.0
+pkginfo==1.7.0
+pycosat==0.6.3
+python-etcd==0.4.5
+PyYAML==5.3.1
+regex==2023.3.23
+ruamel-yaml==0.15.87
+torch==1.7.1
+torchelastic==0.2.1
+torchvision==0.8.2
+Werkzeug==2.2.3
+zipp==3.15.0
diff --git a/jsk_perception/docker/clip/server.py b/jsk_perception/docker/clip/server.py
new file mode 100644
index 0000000000..c26d32db62
--- /dev/null
+++ b/jsk_perception/docker/clip/server.py
@@ -0,0 +1,67 @@
+import clip
+import cv2
+import numpy as np
+import os
+from PIL import Image as PLImage
+import torch
+
+# web server
+from flask import Flask, request, Response
+import json
+import base64
+
+
+def apply_half(t):
+ if t.dtype is torch.float32:
+ return t.to(dtype=torch.half)
+ return t
+
+class Inference:
+ def __init__(self, gpu_id=None):
+ self.gpu_id = gpu_id
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.model, self.preprocess = clip.load('ViT-B/32', self.device)
+
+ def infer(self, img, texts):
+ # get cv2 image
+ image = cv2.resize(img, dsize=(640, 480)) # NOTE forcely
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ image = PLImage.fromarray(image)
+ image_input = self.preprocess(image).unsqueeze(0).to(self.device)
+ text_inputs = torch.cat([clip.tokenize(c) for c in texts]).to(self.device)
+ with torch.no_grad():
+ image_features = self.model.encode_image(image_input)
+ text_features = self.model.encode_text(text_inputs)
+ image_features /= image_features.norm(dim=-1, keepdim=True)
+ text_features /= text_features.norm(dim=-1, keepdim=True)
+ probability = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+ similarity = (text_features.cpu().numpy() @ image_features.cpu().numpy().T).T[0] # cosine similarity
+ values, indices = probability[0].topk(len(texts))
+ results = {}
+ for value, index in zip(values, indices):
+ results[texts[index]] = (value.item(), float(similarity[index]))
+ return results
+
+# run
+if __name__ == "__main__":
+ app = Flask(__name__)
+ infer = Inference()
+
+ @app.route("/inference", methods=['POST'])
+ def caption_request():
+ data = request.data.decode("utf-8")
+ data_json = json.loads(data)
+ # process image
+ image_b = data_json['image']
+ image_dec = base64.b64decode(image_b)
+ data_np = np.fromstring(image_dec, dtype='uint8')
+ img = cv2.imdecode(data_np, 1)
+ # get text
+ texts = data_json['queries']
+ infer_results = infer.infer(img, texts)
+ results = []
+ for q in infer_results:
+ results.append({"question": q, "probability": infer_results[q][0], "similarity": infer_results[q][1]})
+ return Response(response=json.dumps({"results": results}), status=200)
+
+ app.run("0.0.0.0", 8080, threaded=True)
diff --git a/jsk_perception/docker/ofa/Dockerfile b/jsk_perception/docker/ofa/Dockerfile
new file mode 100644
index 0000000000..31ff077074
--- /dev/null
+++ b/jsk_perception/docker/ofa/Dockerfile
@@ -0,0 +1,21 @@
+FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-devel
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt -o Acquire::AllowInsecureRepositories=true update \
+ && apt-get install -y \
+ curl \
+ git \
+ libopencv-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+# use requirements.txt generetaed by `pip freeze | grep ==` in the container created on 2023-03-31
+COPY requirements.txt /tmp/requirements.txt
+RUN sed -i 's/pycocotools==2\.0\.4/pycocotools==2.0.6/g' /tmp/requirements.txt && pip install -r /tmp/requirements.txt
+RUN git clone https://github.com/OFA-Sys/OFA \
+ && mkdir -p /var/mount
+RUN cd OFA \
+ && git checkout 1809b55 \
+ && sed -i 's/pycocotools==2\.0\.4/pycocotools==2.0.6/g' requirements.txt \
+ && pip install -r requirements.txt \
+ && pip install "protobuf<3.20.0" "numpy>=1.20.3,<1.24" "pytorch-lightning==1.7.7" "torch==1.8.1" flask
+COPY server.py /workspace/OFA
+ENTRYPOINT cd /workspace/OFA && python server.py
diff --git a/jsk_perception/docker/ofa/requirements.txt b/jsk_perception/docker/ofa/requirements.txt
new file mode 100644
index 0000000000..25f0bd0144
--- /dev/null
+++ b/jsk_perception/docker/ofa/requirements.txt
@@ -0,0 +1,96 @@
+absl-py==1.4.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+antlr4-python3-runtime==4.8
+async-timeout==4.0.2
+attrs==22.2.0
+bitarray==2.7.3
+brotlipy==0.7.0
+cachetools==5.3.0
+certifi==2020.12.5
+charset-normalizer==3.1.0
+click==8.1.3
+colorama==0.4.6
+conda==4.9.2
+conda-build==3.21.4
+contourpy==1.0.7
+cycler==0.11.0
+Cython==0.29.33
+datasets==2.11.0
+dill==0.3.6
+dnspython==2.1.0
+einops==0.6.0
+Flask==2.2.3
+fonttools==4.39.3
+frozenlist==1.3.3
+fsspec==2023.3.0
+ftfy==6.0.3
+google-auth==2.17.1
+google-auth-oauthlib==0.4.6
+grpcio==1.53.0
+huggingface-hub==0.13.3
+hydra-core==1.0.7
+importlib-metadata==6.1.0
+importlib-resources==5.12.0
+itsdangerous==2.1.2
+jedi==0.17.0
+Jinja2==3.1.2
+joblib==1.2.0
+kiwisolver==1.4.4
+lightning-utilities==0.8.0
+lxml==4.9.2
+Markdown==3.4.3
+MarkupSafe==2.1.2
+matplotlib==3.7.1
+mkl-fft==1.3.0
+mkl-random==1.1.1
+mkl-service==2.3.0
+multidict==6.0.4
+multiprocess==0.70.14
+nltk==3.8.1
+numpy==1.23.5
+oauthlib==3.2.2
+olefile==0.46
+omegaconf==2.0.6
+opencv-python==4.7.0.72
+packaging==23.0
+pandas==1.5.3
+pkginfo==1.7.0
+portalocker==2.7.0
+protobuf==3.19.6
+pyarrow==11.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycocoevalcap==1.2
+pycocotools==2.0.4
+pycosat==0.6.3
+pyDeprecate==0.3.2
+pyparsing==3.0.9
+python-dateutil==2.8.2
+python-etcd==0.4.5
+pytorch-lightning==1.7.7
+PyYAML==5.4.1
+regex==2023.3.23
+requests-oauthlib==1.3.1
+responses==0.18.0
+rouge-score==0.1.2
+rsa==4.9
+ruamel-yaml==0.15.87
+sacrebleu==2.3.1
+tabulate==0.9.0
+tensorboard==2.12.0
+tensorboard-data-server==0.7.0
+tensorboard-plugin-wit==1.8.1
+tensorboardX==2.4.1
+timm==0.6.13
+torch==1.8.1
+torchelastic==0.2.2
+torchmetrics==0.11.4
+torchtext==0.9.1
+torchvision==0.9.1
+tqdm==4.65.0
+typing-extensions==4.5.0
+Werkzeug==2.2.3
+xxhash==3.2.0
+yarl==1.8.2
+zipp==3.15.0
diff --git a/jsk_perception/docker/ofa/server.py b/jsk_perception/docker/ofa/server.py
new file mode 100644
index 0000000000..6c667e3328
--- /dev/null
+++ b/jsk_perception/docker/ofa/server.py
@@ -0,0 +1,278 @@
+# sys
+import os
+# inference
+import torch
+import numpy as np
+import cv2
+from fairseq import utils, tasks
+from fairseq import checkpoint_utils
+from fairseq import options
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from utils.eval_utils import eval_step
+from utils.zero_shot_utils import zero_shot_step
+from tasks.mm_tasks.caption import CaptionTask
+from tasks.mm_tasks.refcoco import RefcocoTask
+from tasks.mm_tasks.vqa_gen import VqaGenTask
+from models.ofa import OFAModel
+from torchvision import transforms
+from PIL import Image
+# web server
+from flask import Flask, request, Response
+import json
+import base64
+
+PARAM_DIR = "/var/mount/params"
+# OFA_PARAM[TASK][SCALE]
+OFA_PARAM = {
+ "caption":{
+ "large":"caption_large_best_clean.pt",
+ "huge":"caption_huge_best.pt"
+ },
+ "refcoco":{
+ "large":"refcocog_large_best.pt",
+ "huge":"refcocog_large_best.pt"
+ },
+ "vqa_gen":{
+ "large":"vqa_large_best.pt",
+ "huge":"vqa_large_best.pt"
+ }
+}
+
+def apply_half(t):
+ if t.dtype is torch.float32:
+ return t.to(dtype=torch.half)
+ return t
+
+class Inference:
+ def __init__(self, task, model_scale):
+ self.use_cuda = torch.cuda.is_available()
+ self.use_fp16 = False
+
+ # set params
+ param = OFA_PARAM[task][model_scale]
+ param_path = os.path.join(PARAM_DIR, param)
+ overrides={"bpe_dir":"utils/BPE", "eval_cider":False, "beam":5,
+ "max_len_b":16, "no_repeat_ngram_size":3, "seed":7}
+
+ self.task_name = task
+ if task == "caption":
+ tasks.register_task(task, CaptionTask)
+ self.models, self.cfg, self.task = checkpoint_utils.load_model_ensemble_and_task(
+ utils.split_paths(param_path),
+ arg_overrides=overrides)
+ elif task == "refcoco":
+ tasks.register_task(self.task, RefcocoTask)
+ self.models, self.cfg, self.task = checkpoint_utils.load_model_ensemble_and_task(
+ utils.split_paths(param_path),
+ arg_overrides=overrides)
+ self.cfg.common.seed = 7
+ self.cfg.generation.beam = 5
+ self.cfg.generation.min_len = 4
+ self.cfg.generation.max_len_a = 0
+ self.cfg.generation.max_len_b = 4
+ self.cfg.generation.no_repeat_ngram_size = 3
+ if self.cfg.common.seed is not None and not self.cfg.generation.no_seed_provided:
+ np.random.seed(self.cfg.common.seed)
+ utils.set_torch_seed(self.cfg.common.seed)
+ elif task == "vqa_gen":
+ tasks.register_task('vqa_gen', VqaGenTask)
+ parser = options.get_generation_parser()
+ input_args = ["", "--task=vqa_gen", "--beam=100", "--unnormalized", "--path={}".format(param_path), "--bpe-dir=utils/BPE"]
+ args = options.parse_args_and_arch(parser, input_args)
+ cfg = convert_namespace_to_omegaconf(args)
+ self.task = tasks.setup_task(cfg.task)
+ self.models, self.cfg = checkpoint_utils.load_model_ensemble(
+ utils.split_paths(cfg.common_eval.path),
+ task=self.task)
+ else:
+ raise RuntimeError("Please select models from caption, refcoco, vqa_gen")
+ return
+
+ # Move models to GPU
+ for model in self.models:
+ model.eval()
+ if self.use_fp16:
+ model.half()
+ if self.use_cuda and not self.cfg.distributed_training.pipeline_model_parallel:
+ model.cuda()
+ model.prepare_for_inference_(self.cfg)
+
+ # Image transform
+ self.generator = self.task.build_generator(self.models, self.cfg.generation)
+ mean = [0.5, 0.5, 0.5]
+ std = [0.5, 0.5, 0.5]
+ self.patch_resize_transform = transforms.Compose([
+ lambda image: image.convert("RGB"),
+ transforms.Resize((self.cfg.task.patch_image_size, self.cfg.task.patch_image_size), interpolation=Image.BICUBIC),
+ transforms.ToTensor(),
+ transforms.Normalize(mean=mean, std=std),
+ ])
+
+ self.pad_idx = self.task.src_dict.pad()
+
+ def visual_grounding(self, Image, Text):
+ sample = self.construct_sample(Image, Text.lower())
+ sample = utils.move_to_cuda(sample) if self.use_cuda else sample
+ sample = utils.apply_to_sample(apply_half, sample) if self.use_fp16 else sample
+ with torch.no_grad():
+ result, scores = eval_step(self.task, self.generator, self.models, sample)
+ img = np.asarray(Image)
+ cv2.rectangle(
+ img,
+ (int(result[0]["box"][0]), int(result[0]["box"][1])),
+ (int(result[0]["box"][2]), int(result[0]["box"][3])),
+ (0, 255, 0), 3)
+ return img
+
+ def encode_text(self, text, length=None, append_bos=False, append_eos=False):
+ bos_item = torch.LongTensor([self.task.src_dict.bos()])
+ eos_item = torch.LongTensor([self.task.src_dict.eos()])
+ # pad_idx = self.task.src_dict.pad()
+ s = self.task.tgt_dict.encode_line(
+ line=self.task.bpe.encode(text),
+ add_if_not_exist=False,
+ append_eos=False).long()
+ if length is not None:
+ s = s[:length]
+ if append_bos:
+ s = torch.cat([bos_item, s])
+ if append_eos:
+ s = torch.cat([s, eos_item])
+ return s
+
+ def construct_sample(self, image, text):
+ if self.task_name == "caption" or self.task_name == "vqa_gen":
+ patch_image = self.patch_resize_transform(image).unsqueeze(0)
+ patch_mask = torch.tensor([True])
+ src_text = self.encode_text(" " + text, append_bos=True, append_eos=True).unsqueeze(0)
+ src_length = torch.LongTensor([s.ne(self.pad_idx).long().sum() for s in src_text])
+ if self.task_name == "caption":
+ sample = {
+ "id":np.array(['42']),
+ "net_input": {
+ "src_tokens": src_text,
+ "src_lengths": src_length,
+ "patch_images": patch_image,
+ "patch_masks": patch_mask
+ }
+ }
+ elif self.task_name == "vqa_gen":
+ ref_dict = np.array([{'yes': 1.0}]) # just placeholder
+ sample = {
+ "id":np.array(['42']),
+ "net_input": {
+ "src_tokens": src_text,
+ "src_lengths": src_length,
+ "patch_images": patch_image,
+ "patch_masks": patch_mask
+ },
+ "ref_dict": ref_dict,
+ }
+ return sample
+ elif self.task_name == "refcoco":
+ patch_image_size = self.cfg.task.patch_image_size
+ w, h = image.size
+ w_resize_ratio = torch.tensor(patch_image_size / w).unsqueeze(0)
+ h_resize_ratio = torch.tensor(patch_image_size / h).unsqueeze(0)
+ patch_image = self.patch_resize_transform(image).unsqueeze(0)
+ patch_mask = torch.tensor([True])
+ src_text = self.encode_text(' which region does the text " {} " describe?'.format(text), append_bos=True,
+ append_eos=True).unsqueeze(0)
+ src_length = torch.LongTensor([s.ne(self.pad_idx).long().sum() for s in src_text])
+ sample = {
+ "id": np.array(['42']),
+ "net_input": {
+ "src_tokens": src_text,
+ "src_lengths": src_length,
+ "patch_images": patch_image,
+ "patch_masks": patch_mask,
+ },
+ "w_resize_ratios": w_resize_ratio,
+ "h_resize_ratios": h_resize_ratio,
+ "region_coords": torch.randn(1, 4)
+ }
+ return sample
+
+ def infer(self, img, text):
+ # get cv2 image
+ if self.task_name == "caption" or self.task_name == "vqa_gen":
+ image = cv2.resize(img, dsize=(640, 480)) # NOTE forcely
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+ image = Image.fromarray(image)
+ # Construct input sample & preprocess for GPU if cuda available
+ sample = self.construct_sample(image, text)
+ sample = utils.move_to_cuda(sample) if self.use_cuda else sample
+ sample = utils.apply_to_sample(apply_half, sample) if self.use_fp16 else sample
+ if self.task_name == "caption":
+ with torch.no_grad():
+ result, scores = eval_step(self.task, self.generator, self.models, sample)
+ text = result[0]['caption']
+ return text
+ elif self.task_name == "vqa_gen":
+ with torch.no_grad():
+ result, scores = zero_shot_step(self.task, self.generator, self.models, sample)
+ text = result[0]['answer']
+ return text
+ elif self.task_name == "refcoco":
+ pass
+
+# run
+if __name__ == "__main__":
+ app = Flask(__name__)
+ ofa_task = os.environ["OFA_TASK"]
+ ofa_model_scale = os.environ["OFA_MODEL_SCALE"]
+ # TODO add refcoco
+ if ofa_task == "all":
+ caption_infer = Inference("caption", ofa_model_scale)
+ vqa_infer = Inference("vqa_gen", ofa_model_scale)
+
+ elif ofa_task == "caption":
+ caption_infer = Inference("caption", ofa_model_scale)
+
+ elif ofa_task == "vqa_gen":
+ vqa_infer = Inference("vqa_gen", ofa_model_scale)
+
+ else:
+ raise RuntimeError("No application is available")
+
+ try:
+ @app.route("/caption", methods=['POST'])
+ def caption_request():
+ data = request.data.decode("utf-8")
+ data_json = json.loads(data)
+ # process image
+ image_b = data_json['image']
+ image_dec = base64.b64decode(image_b)
+ data_np = np.fromstring(image_dec, dtype='uint8')
+ img = cv2.imdecode(data_np, 1)
+ # get text
+ texts = data_json['queries']
+ results = []
+ for text in texts:
+ answer = caption_infer.infer(img, text)
+ results.append({"question": text, "answer": answer})
+ return Response(response=json.dumps({"results": results}), status=200)
+ except NameError:
+ print("Skipping create caption app")
+
+ try:
+ @app.route("/vqa_gen", methods=['POST'])
+ def vqa_request():
+ data = request.data.decode("utf-8")
+ data_json = json.loads(data)
+ # process image
+ image_b = data_json['image']
+ image_dec = base64.b64decode(image_b)
+ data_np = np.fromstring(image_dec, dtype='uint8')
+ img = cv2.imdecode(data_np, 1)
+ # get text
+ texts = data_json['queries']
+ results = []
+ for text in texts:
+ answer = vqa_infer.infer(img, text)
+ results.append({"question": text, "answer": answer})
+ return Response(response=json.dumps({"results": results}), status=200)
+ except NameError:
+ print("Skipping create vqa_gen app")
+
+ app.run("0.0.0.0", 8080, threaded=True)
diff --git a/jsk_perception/docker/parameter_urls.txt b/jsk_perception/docker/parameter_urls.txt
new file mode 100644
index 0000000000..361481ee8c
--- /dev/null
+++ b/jsk_perception/docker/parameter_urls.txt
@@ -0,0 +1,4 @@
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_huge_best.pt
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/refcocog_large_best.pt
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/vqa_large_best.pt
diff --git a/jsk_perception/docker/run_jsk_vil_api b/jsk_perception/docker/run_jsk_vil_api
new file mode 100755
index 0000000000..acef636280
--- /dev/null
+++ b/jsk_perception/docker/run_jsk_vil_api
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+import argparse
+import os
+try:
+ import rospy
+ import_rospy = True
+except ImportError:
+ import_rospy = False
+import subprocess
+import sys
+
+CONTAINERS = {"ofa": "jsk-ofa-server",
+ "clip": "jsk-clip-server"}
+OFA_MODEL_SCALES = ["base", "large", "huge"]
+
+parser = argparse.ArgumentParser(description="JSK Vision and Language API runner")
+parser.add_argument("model", choices=CONTAINERS.keys(),
+ help="Vision and Language model you want to run", type=str)
+parser.add_argument("-p", "--port", default=8888, help="Port of the API server", type=int)
+parser.add_argument("--ofa_task", default="caption",
+ help="Tasks to be loaded in OFA. OFA loads all tasks by default and requires large amount of GPU RAM")
+parser.add_argument("--ofa_model_scale", default="huge", choices=OFA_MODEL_SCALES,
+ help="Scale of parameters to be read in OFA. Use huge by default")
+
+args, unknown = parser.parse_known_args()
+
+if __name__ == "__main__":
+ if import_rospy and len(sys.argv) != len(rospy.myargv()): # roslaunch
+ rospy.init_node("jsk_vil_docker_manager")
+ port_option = "{}:8080".format(str(args.port))
+ _ros_home = os.getenv("ROS_HOME", os.path.join(os.environ["HOME"],".ros"))
+ mount_option = "{}:/var/mount/params".format(os.path.join(_ros_home, "data", "jsk_perception", "vil_params"))
+ docker_image = "{}:latest".format(CONTAINERS[args.model])
+
+ cmd = ["docker", "run", "--rm", "-it", "--gpus", "all",
+ "-p", port_option,
+ "-v", mount_option]
+
+ # OFA
+ if args.model == "ofa":
+ ofa_option = ["-e", "OFA_TASK={}".format(args.ofa_task),
+ "-e", "OFA_MODEL_SCALE={}".format(args.ofa_model_scale)]
+ cmd += ofa_option
+
+ # Execute
+ print("Run {} container on port {}".format(docker_image, args.port))
+ cmd.append(docker_image)
+ subprocess.run(cmd)
diff --git a/jsk_perception/include/jsk_perception/invert_mask_image.h b/jsk_perception/include/jsk_perception/invert_mask_image.h
new file mode 100644
index 0000000000..6d6ce4b03f
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/invert_mask_image.h
@@ -0,0 +1,66 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2022, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+
+
+#ifndef JSK_PERCEPTION_INVERT_MASK_IMAGE_H_
+#define JSK_PERCEPTION_INVERT_MASK_IMAGE_H_
+
+#include
+#include
+#include
+
+
+namespace jsk_perception
+{
+ class InvertMaskImage: public jsk_topic_tools::DiagnosticNodelet
+ {
+ public:
+ InvertMaskImage(): DiagnosticNodelet("InvertMaskImage") {}
+ virtual ~InvertMaskImage();
+ protected:
+
+ virtual void onInit();
+ virtual void subscribe();
+ virtual void unsubscribe();
+ virtual void invert(const sensor_msgs::Image::ConstPtr& mask_msg);
+
+ boost::mutex mutex_;
+ ros::Subscriber sub_mask_;
+ ros::Publisher pub_mask_;
+ private:
+ };
+}
+
+#endif
diff --git a/jsk_perception/include/jsk_perception/point_pose_extractor.h b/jsk_perception/include/jsk_perception/point_pose_extractor.h
new file mode 100644
index 0000000000..ddcdfd00ed
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/point_pose_extractor.h
@@ -0,0 +1,706 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2015, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+#ifndef JSK_PERCEPTION_POINT_POSE_EXTRACTOR_H_
+#define JSK_PERCEPTION_POINT_POSE_EXTRACTOR_H_
+
+#include
+#include
+#include
+#include
+#if ( CV_MAJOR_VERSION >= 4)
+#include
+#include
+#include
+#else
+#include
+#include
+#endif
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace enc = sensor_msgs::image_encodings;
+
+void features2keypoint (posedetection_msgs::Feature0D features,
+ std::vector& keypoints,
+ cv::Mat& descriptors){
+ keypoints.resize(features.scales.size());
+ descriptors.create(features.scales.size(),features.descriptor_dim,CV_32FC1);
+ std::vector::iterator keypoint_it = keypoints.begin();
+ for ( int i=0; keypoint_it != keypoints.end(); ++keypoint_it, ++i ) {
+ *keypoint_it = cv::KeyPoint(cv::Point2f(features.positions[i*2+0],
+ features.positions[i*2+1]),
+ features.descriptor_dim, // size
+ features.orientations[i], //angle
+ 0, // resonse
+ features.scales[i] // octave
+ );
+ for (int j = 0; j < features.descriptor_dim; j++){
+ descriptors.at(i,j) =
+ features.descriptors[i*features.descriptor_dim+j];
+ }
+ }
+}
+
+
+class Matching_Template
+{
+public:
+ std::string _matching_frame;
+ cv::Mat _template_img;
+ std::vector _template_keypoints;
+ cv::Mat _template_descriptors;
+ int _original_width_size;
+ int _original_height_size;
+ double _template_width; // width of template [m]
+ double _template_height; // height of template [m]
+ tf::Transform _relativepose;
+ cv::Mat _affine_matrix;
+ std::string _window_name;
+ // The maximum allowed reprojection error to treat a point pair as an inlier
+ double _reprojection_threshold;
+ // threshold on squared ratio of distances between NN and 2nd NN
+ double _distanceratio_threshold;
+ std::vector _correspondances;
+ cv::Mat _previous_stack_img;
+
+ Matching_Template(){
+ }
+ Matching_Template(cv::Mat img,
+ std::string matching_frame,
+ int original_width_size,
+ int original_height_size,
+ double template_width,
+ double template_height,
+ tf::Transform relativepose,
+ cv::Mat affine_matrix,
+ double reprojection_threshold,
+ double distanceratio_threshold,
+ std::string window_name,
+ bool autosize){
+
+ _template_img = img.clone();
+ _matching_frame = matching_frame;
+ _original_width_size = original_width_size;
+ _original_height_size = original_height_size;
+ _template_width = template_width;
+ _template_height = template_height;
+ _relativepose = relativepose;
+ _affine_matrix = affine_matrix;
+ _window_name = window_name;
+ _reprojection_threshold = reprojection_threshold;
+ _distanceratio_threshold = distanceratio_threshold;
+ _template_keypoints.clear();
+ _correspondances.clear();
+ // cv::namedWindow(_window_name, autosize ? CV_WINDOW_AUTOSIZE : 0);
+ }
+
+
+ virtual ~Matching_Template(){
+ std::cerr << "delete " << _window_name << std::endl;
+ }
+
+
+ std::string get_window_name(){
+ return _window_name;
+ }
+
+ std::vector* correspondances(){
+ return &_correspondances;
+ }
+
+ bool check_template (){
+ if (_template_img.empty() && _template_keypoints.size() == 0 &&
+ _template_descriptors.empty()){
+ return false;
+ }
+ else {
+ return true;
+ }
+ }
+
+
+ int set_template(ros::ServiceClient client){
+ posedetection_msgs::Feature0DDetect srv;
+
+ if ( _template_img.empty()) {
+ ROS_ERROR ("template picture is empty.");
+ return -1;
+ }
+ ROS_INFO_STREAM("read template image and call " << client.getService() << " service");
+ cv_bridge::CvImage cv_img;
+ cv_img.image = cv::Mat(_template_img);
+ cv_img.encoding = std::string("bgr8");
+ srv.request.image = *(cv_img.toImageMsg());
+ if (client.call(srv)) {
+ ROS_INFO_STREAM("get features with " << srv.response.features.scales.size() << " descriptoins");
+ features2keypoint (srv.response.features, _template_keypoints, _template_descriptors);
+
+ // inverse affine translation
+ cv::Mat M_inv;
+ cv::Mat dst;
+ cv::invert(_affine_matrix, M_inv);
+ // cv::drawKeypoints (tmp, _template_keypoints, dst, cv::Scalar::all(1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
+ // cv::drawKeypoints (tmp, _template_keypoints, dst);
+ // cv::warpPerspective(_template_img, dst, M_inv,
+ // cv::Size(_original_width_size, _original_height_size),
+ // CV_INTER_LINEAR, IPL_BORDER_CONSTANT, 0);
+ for (int i = 0; i < (int)_template_keypoints.size(); i++){
+ cv::Point2f pt;
+ cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
+ cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &_template_keypoints.at(i).pt);
+ cv::perspectiveTransform (pt_src_mat, pt_mat, M_inv);
+ _template_keypoints.at(i).pt = pt_mat.at(0,0);
+ }
+ // cvSetMouseCallback (_window_name.c_str(), &PointPoseExtractor::cvmousecb, this);
+ // _template_img = dst;
+ return 0;
+ } else {
+ ROS_ERROR("Failed to call service Feature0DDetect");
+ return 1;
+ }
+ }
+
+
+ double log_fac( int n )
+ {
+ static std::vector slog_table;
+ int osize = slog_table.size();
+ if(osize <= n){
+ slog_table.resize(n+1);
+ if(osize == 0){
+ slog_table[0] = -1;
+ slog_table[1] = log(1.0);
+ osize = 2;
+ }
+ for(int i = osize; i <= n; i++ ){
+ slog_table[i] = slog_table[i-1] + log(i);
+ }
+ }
+ return slog_table[n];
+ }
+
+ int min_inlier( int n, int m, double p_badsupp, double p_badxform )
+ {
+ double pi, sum;
+ int i, j;
+ double lp_badsupp = log( p_badsupp );
+ double lp_badsupp1 = log( 1.0 - p_badsupp );
+ for( j = m+1; j <= n; j++ ){
+ sum = 0;
+ for( i = j; i <= n; i++ ){
+ pi = (i-m) * lp_badsupp + (n-i+m) * lp_badsupp1 +
+ log_fac( n - m ) - log_fac( i - m ) - log_fac( n - i );
+ sum += exp( pi );
+ }
+ if( sum < p_badxform )
+ break;
+ }
+ return j;
+ }
+
+
+ bool estimate_od (ros::ServiceClient client, cv::Mat src_img,
+ std::vector sourceimg_keypoints,
+ image_geometry::PinholeCameraModel pcam,
+ double err_thr, cv::Mat &stack_img,
+ cv::flann::Index* ft, posedetection_msgs::Object6DPose* o6p){
+
+ if ( _template_keypoints.size()== 0 &&
+ _template_descriptors.empty() ){
+ set_template(client);
+ }
+ if ( _template_keypoints.size()== 0 &&
+ _template_descriptors.empty() ) {
+ ROS_ERROR ("Template image was not set.");
+ return false;
+ }
+ // stacked image
+ cv::Size stack_size = cv::Size(MAX(src_img.cols,_template_img.cols),
+ src_img.rows+_template_img.rows);
+ stack_img = cv::Mat(stack_size,CV_8UC3);
+ stack_img = cv::Scalar(0);
+ cv::Mat stack_img_tmp(stack_img,cv::Rect(0,0,_template_img.cols,_template_img.rows));
+ cv::add(stack_img_tmp, _template_img, stack_img_tmp);
+ cv::Mat stack_img_src(stack_img,cv::Rect(0,_template_img.rows,src_img.cols,src_img.rows));
+ cv::add(stack_img_src, src_img, stack_img_src);
+ _previous_stack_img = stack_img.clone();
+
+ // matching
+ cv::Mat m_indices(_template_descriptors.rows, 2, CV_32S);
+ cv::Mat m_dists(_template_descriptors.rows, 2, CV_32F);
+ ft->knnSearch(_template_descriptors, m_indices, m_dists, 2, cv::flann::SearchParams(-1) );
+
+ // matched points
+ std::vector pt1, pt2;
+ std::vector queryIdxs,trainIdxs;
+ for ( unsigned int j = 0; j < _template_keypoints.size(); j++ ) {
+ if ( m_dists.at(j,0) < m_dists.at(j,1) * _distanceratio_threshold) {
+ queryIdxs.push_back(j);
+ trainIdxs.push_back(m_indices.at(j,0));
+ }
+ }
+ if ( queryIdxs.size() == 0 ) {
+ ROS_WARN_STREAM("could not find matched points with distanceratio(" <<_distanceratio_threshold << ")");
+ } else {
+ cv::KeyPoint::convert(_template_keypoints,pt1,queryIdxs);
+ cv::KeyPoint::convert(sourceimg_keypoints,pt2,trainIdxs);
+ }
+
+ ROS_INFO ("Found %d total matches among %d template keypoints", (int)pt2.size(), (int)_template_keypoints.size());
+
+ cv::Mat H;
+ std::vector mask((int)pt2.size());
+
+ if ( pt1.size() > 4 ) {
+ // ToDO for curve face
+ H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2), mask, CV_RANSAC, _reprojection_threshold);
+ }
+
+ // draw line
+ for (int j = 0; j < (int)pt1.size(); j++){
+ cv::Point2f pt, pt_orig;
+ cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
+ cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &pt1.at(j));
+ cv::perspectiveTransform (pt_src_mat, pt_mat, _affine_matrix);
+ pt_orig = pt_mat.at(0,0);
+ if ( mask.at(j)){
+ cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
+ CV_RGB(0,255,0), 1,8,0);
+ }
+ else {
+ cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
+ CV_RGB(255,0,255), 1,8,0);
+ }
+ }
+ int inlier_sum = 0;
+ for (int k=0; k < (int)mask.size(); k++){
+ inlier_sum += (int)mask.at(k);
+ }
+
+ double text_scale = 1.5;
+ {
+ int fontFace = 0, thickness = 0, baseLine;
+ int x, y;
+ cv::Size text_size;
+ std::string text;
+
+ text = "inlier: " + boost::lexical_cast((int)inlier_sum) + " / " + boost::lexical_cast((int)pt2.size());
+ text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
+ x = stack_img.size().width - text_size.width;
+ y = text_size.height + thickness + 10; // 10pt pading
+ cv::putText (stack_img, text, cv::Point(x, y),
+ fontFace, text_scale, CV_RGB(0, 255, 0),
+ thickness, 8, false);
+
+ text = "template: " + boost::lexical_cast((int)_template_keypoints.size());
+ text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
+ x = stack_img.size().width - text_size.width;
+ y += text_size.height + thickness + 10; // 10pt pading
+ cv::putText (stack_img, text, cv::Point(x, y),
+ fontFace, text_scale, CV_RGB(0, 255, 0),
+ thickness, 8, false);
+ }
+
+ // draw correspondances
+ ROS_INFO(" _correspondances.size: %d", (int)_correspondances.size());
+ for (int j = 0; j < (int)_correspondances.size(); j++){
+ cv::circle(stack_img, cv::Point2f(_correspondances.at(j).x, _correspondances.at(j).y + _template_img.size().height),
+ 8, CV_RGB(255,0,0), -1);
+ }
+
+ ROS_INFO(" inlier_sum:%d min_lier:%d", inlier_sum, min_inlier((int)pt2.size(), 4, 0.10, 0.01));
+ if ((cv::countNonZero( H ) == 0) || (inlier_sum < min_inlier((int)pt2.size(), 4, 0.10, 0.01))){
+ ROS_INFO(" inlier_sum < min_lier return-from estimate-od");
+ if( _window_name != "" )
+ cv::imshow(_window_name, stack_img);
+ return false;
+ }
+
+ std::string _type;
+ char chr[20];
+
+ // number of match points
+ sprintf(chr, "%d", (int)pt2.size());
+ _type = _matching_frame + "_" + std::string(chr);
+
+ sprintf(chr, "%d", inlier_sum);
+ _type = _type + "_" + std::string(chr);
+
+ cv::Point2f corners2d[4] = {cv::Point2f(0,0),
+ cv::Point2f(_original_width_size,0),
+ cv::Point2f(_original_width_size,_original_height_size),
+ cv::Point2f(0,_original_height_size)};
+ cv::Mat corners2d_mat (cv::Size(4, 1), CV_32FC2, corners2d);
+ cv::Point3f corners3d[4] = {cv::Point3f(0,0,0),
+ cv::Point3f(0,_template_width,0),
+ cv::Point3f(_template_height,_template_width,0),
+ cv::Point3f(_template_height,0,0)};
+ cv::Mat corners3d_mat (cv::Size(4, 1), CV_32FC3, corners3d);
+
+ cv::Mat corners2d_mat_trans;
+
+ cv::perspectiveTransform (corners2d_mat, corners2d_mat_trans, H);
+
+ double fR3[3], fT3[3];
+ cv::Mat rvec(3, 1, CV_64FC1, fR3);
+ cv::Mat tvec(3, 1, CV_64FC1, fT3);
+ cv::Mat zero_distortion_mat = cv::Mat::zeros(4, 1, CV_64FC1);
+
+ cv::solvePnP (corners3d_mat, corners2d_mat_trans,
+ pcam.intrinsicMatrix(),
+ zero_distortion_mat,//if unrectified: pcam.distortionCoeffs()
+ rvec, tvec);
+
+ tf::Transform checktf, resulttf;
+
+ checktf.setOrigin( tf::Vector3(fT3[0], fT3[1], fT3[2] ) );
+
+ double rx = fR3[0], ry = fR3[1], rz = fR3[2];
+ tf::Quaternion quat;
+ double angle = cv::norm(rvec);
+ quat.setRotation(tf::Vector3(rx/angle, ry/angle, rz/angle), angle);
+ checktf.setRotation( quat );
+
+ resulttf = checktf * _relativepose;
+
+ ROS_INFO( " tx: (%0.2lf,%0.2lf,%0.2lf) rx: (%0.2lf,%0.2lf,%0.2lf)",
+ resulttf.getOrigin().getX(),
+ resulttf.getOrigin().getY(),
+ resulttf.getOrigin().getZ(),
+ resulttf.getRotation().getAxis().x() * resulttf.getRotation().getAngle(),
+ resulttf.getRotation().getAxis().y() * resulttf.getRotation().getAngle(),
+ resulttf.getRotation().getAxis().z() * resulttf.getRotation().getAngle());
+
+ o6p->pose.position.x = resulttf.getOrigin().getX();
+ o6p->pose.position.y = resulttf.getOrigin().getY();
+ o6p->pose.position.z = resulttf.getOrigin().getZ();
+ o6p->pose.orientation.w = resulttf.getRotation().w();
+ o6p->pose.orientation.x = resulttf.getRotation().x();
+ o6p->pose.orientation.y = resulttf.getRotation().y();
+ o6p->pose.orientation.z = resulttf.getRotation().z();
+ o6p->type = _matching_frame; // _type
+
+ // draw 3d cube model
+ std::vector projected_top;
+ {
+ tf::Vector3 coords[8] = {tf::Vector3(0,0,0),
+ tf::Vector3(0, _template_width, 0),
+ tf::Vector3(_template_height, _template_width,0),
+ tf::Vector3(_template_height, 0, 0),
+ tf::Vector3(0, 0, -0.03),
+ tf::Vector3(0, _template_width, -0.03),
+ tf::Vector3(_template_height, _template_width, -0.03),
+ tf::Vector3(_template_height, 0, -0.03)};
+
+ projected_top = std::vector(8);
+
+ for(int i=0; i<8; i++) {
+ coords[i] = checktf * coords[i];
+ cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());
+ projected_top[i] = pcam.project3dToPixel(pt);
+ }
+ }
+
+ { // check if the matched region does not too big or too small
+ float max_x, max_y, min_x, min_y;
+ max_x = max_y = -1e9;
+ min_x = min_y = 1e9;
+ for (int j = 0; j < 4; j++){
+ cv::Point2f pt = corners2d_mat_trans.at(0,j);
+ max_x = std::max(max_x, pt.x), max_y = std::max(max_y, pt.y);
+ min_x = std::min(min_x, pt.x), min_y = std::min(min_y, pt.y);
+ }
+ if((max_x - min_x) < 30 || (max_y - min_y) < 30 ||
+ src_img.rows < (max_x - min_x)/2 || src_img.cols < (max_y - min_y)/2){
+ ROS_INFO(" matched region is too big or small (2< && <30) width:%f height:%f return-from estimate-od", max_x - min_x, max_y - min_y);
+ return false;
+ }
+ }
+
+ double err_sum = 0;
+ bool err_success = true;
+ for (int j = 0; j < 4; j++){
+ double err = sqrt(pow((corners2d_mat_trans.at(0,j).x - projected_top.at(j).x), 2) +
+ pow((corners2d_mat_trans.at(0,j).y - projected_top.at(j).y), 2));
+ err_sum += err;
+ }
+ if (err_sum > err_thr){
+ ROS_INFO(" err_sum:%f > err_thr:%f return-from estimate-od", err_sum, err_thr);
+ err_success = false;
+ } else {
+ o6p->reliability = 1.0 - (err_sum / err_thr);
+ }
+ // draw lines around the detected object
+ for (int j = 0; j < corners2d_mat_trans.cols; j++){
+ cv::Point2f p1(corners2d_mat_trans.at(0,j).x,
+ corners2d_mat_trans.at(0,j).y+_template_img.rows);
+ cv::Point2f p2(corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).x,
+ corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).y+_template_img.rows);
+ cv::line (stack_img, p1, p2, CV_RGB(255, 0, 0),
+ (err_success?4:1), // width
+ CV_AA, 0);
+ }
+
+ // draw 3d cube model
+ if(projected_top.size() == 8) { // verify, the size is 8
+ int cnt = 8;
+ std::vector ps(cnt);
+ for(int i=0; i ps(4);
+
+ for(int i=0; i<4; i++) {
+ coords[i] = resulttf * coords[i];
+ cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());
+ ps[i] = pcam.project3dToPixel(pt);
+ ps[i].y += _template_img.rows; // draw on camera image
+ }
+
+ cv::line (stack_img, ps[0], ps[1], CV_RGB(255, 0, 0), 3, CV_AA, 0);
+ cv::line (stack_img, ps[0], ps[2], CV_RGB(0, 255, 0), 3, CV_AA, 0);
+ cv::line (stack_img, ps[0], ps[3], CV_RGB(0, 0, 255), 3, CV_AA, 0);
+ }
+
+ // write text on image
+ {
+ std::string text;
+ int x, y;
+ text = "error: " + boost::lexical_cast(err_sum);
+ x = stack_img.size().width - 16*17*text_scale; // 16pt * 17
+ y = _template_img.size().height - (16 + 2)*text_scale*6;
+ cv::putText (stack_img, text, cv::Point(x, y),
+ 0, text_scale, CV_RGB(0, 255, 0),
+ 2, 8, false);
+ ROS_INFO(" %s < %f (threshold)", text.c_str(), err_thr );
+ }
+ // for debug window
+ if( _window_name != "" )
+ cv::imshow(_window_name, stack_img);
+
+ return err_success;
+ }
+}; // the end of difinition of class Matching_Template
+
+
+namespace jsk_perception
+{
+
+ class PointPoseExtractor: public jsk_topic_tools::DiagnosticNodelet
+ {
+ public:
+ std::vector _templates;
+ typedef jsk_perception::point_pose_extractorConfig Config;
+ PointPoseExtractor() : DiagnosticNodelet("PointPoseExtractor") {}
+ virtual ~PointPoseExtractor();
+ protected:
+ ros::NodeHandle _n;
+ image_transport::ImageTransport *it;
+ ros::Subscriber _sub;
+ ros::ServiceServer _server;
+ ros::ServiceClient _client;
+ ros::Publisher _pub, _pub_agg, _pub_pose;
+ tf::TransformBroadcaster _br;
+ image_transport::Publisher _debug_pub;
+ bool _first_sample_change;
+ double _reprojection_threshold;
+ double _distanceratio_threshold;
+ double _th_step;
+ double _phi_step;
+ bool _autosize;
+ double _err_thr;
+ image_geometry::PinholeCameraModel pcam;
+ bool pnod;
+ bool _initialized;
+ bool _viewer;
+ bool _publish_tf;
+ std::string _child_frame_id;
+
+ virtual void initialize ();
+ virtual cv::Mat make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec,
+ double template_width, double template_height, cv::Size &size);
+ virtual int make_warped_images (cv::Mat src, std::vector &imgs,
+ std::vector &Mvec,
+ double template_width, double template_height,
+ double th_step, double phi_step);
+ virtual bool add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose,
+ double template_width, double template_height,
+ double theta_step, double phi_step);
+ virtual bool settemplate_cb (jsk_perception::SetTemplate::Request &req,
+ jsk_perception::SetTemplate::Response &res);
+ virtual void imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg);
+ virtual void onInit();
+ virtual void subscribe();
+ virtual void unsubscribe();
+ virtual void dyn_conf_callback(Config &config, uint32_t level);
+
+ static void cvmousecb (int event, int x, int y, int flags, void* param){
+ PointPoseExtractor* ppe = static_cast(param);
+ Matching_Template *mt = ppe->_templates.back();
+ // std::cerr << "mousecb_ -> " << mt << std::endl;
+ switch (event){
+ case CV_EVENT_LBUTTONUP: {
+ cv::Point2d pt(x,y - (int)mt->_template_img.size().height);
+ ROS_INFO("add correspondence (%d, %d)", (int)pt.x, (int)pt.y);
+ mt->_correspondances.push_back(pt);
+ if ((int)mt->_correspondances.size() >= 4){
+ make_template_from_mousecb(ppe);
+ mt->_correspondances.clear();
+ ROS_INFO("reset");
+ }
+ break;
+ }
+ case CV_EVENT_RBUTTONUP: {
+ mt->_correspondances.clear();
+ ROS_INFO("reset");
+ break;
+ }
+ }
+ }
+ static void make_template_from_mousecb(PointPoseExtractor* ppe){
+ Matching_Template *mt = ppe->_templates.back();
+ cv::Mat H;
+ cv::Mat tmp_template, tmp_warp_template;
+ std::vectorpt1, pt2;
+ double width, height;
+ std::string filename;
+ std::cout << "input template's [width]" << std::endl;
+ std::cin >> width;
+ std::cout << "input template's [height]" << std::endl;
+ std::cin >> height;
+ std::cout << "input template's [filename]" << std::endl;
+ std::cin >> filename;
+
+ for (int i = 0; i < 4; i++){
+ pt1.push_back(cv::Point2d((int)mt->_correspondances.at(i).x,
+ (int)mt->_correspondances.at(i).y + mt->_template_img.size().height));
+ }
+ cv::Rect rect = cv::boundingRect(cv::Mat(pt1));
+ double scale = std::max(width, height) / 500.0;
+ int iwidth = width / scale, iheight = height / scale;
+ pt2.push_back(cv::Point2d(0,0));
+ pt2.push_back(cv::Point2d(iwidth,0));
+ pt2.push_back(cv::Point2d(iwidth,iheight));
+ pt2.push_back(cv::Point2d(0, iheight));
+ H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2));
+
+ cv::getRectSubPix(mt->_previous_stack_img, rect.size(),
+ cv::Point2f((rect.tl().x + rect.br().x)/2.0,(rect.tl().y + rect.br().y)/2.0),
+ tmp_template);
+ cv::warpPerspective(mt->_previous_stack_img, tmp_warp_template, H, cv::Size(iwidth, iheight));
+
+ try {
+ cv::imwrite(filename,tmp_template);
+ boost::filesystem::path fname(filename);
+ std::stringstream ss;
+ ss << (fname.parent_path() / fname.stem()).c_str() << "_wrap" << fname.extension().c_str();
+ cv::imwrite(ss.str(),tmp_warp_template);
+ }catch (cv::Exception e) {
+ std::cerr << e.what() << std::endl;
+ }
+
+ for (int i = 0; i < (int)pt1.size(); i++){
+ pt2.push_back(cv::Point2d((int)pt1.at(i).x - rect.x,
+ (int)pt1.at(i).y - rect.y - mt->_template_img.size().height));
+ }
+ // cv::Mat mask_img = cv::Mat::zeros(tmp_template.size(), CV_8UC3);
+ // cv::fillConvexPoly(mask_img, pt2.begin(), (int)pt2.size(), CV_RGB(255, 255, 255));
+
+ // cv::namedWindow("hoge", 1);
+ // cv::imshow("hoge", mask_img);
+
+ cv::Mat M = (cv::Mat_(3,3) << 1,0,0, 0,1,0, 0,0,1);
+ std::string window_name = "sample" + boost::lexical_cast((int)ppe->_templates.size());
+
+ Matching_Template* tmplt =
+ new Matching_Template (tmp_warp_template, "sample",
+ tmp_warp_template.size().width, tmp_warp_template.size().height,
+ width, height,
+ tf::Transform(tf::Quaternion(0, 0, 0, 1), tf::Vector3(0, 0, 0)),
+ M,
+ mt->_reprojection_threshold,
+ mt->_distanceratio_threshold,
+ ppe->_first_sample_change ? window_name : mt->_window_name,
+ cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
+
+ mt->_correspondances.clear();
+ ppe->_templates.push_back(tmplt);
+ cv::namedWindow(ppe->_first_sample_change ? window_name : mt->_window_name,
+ cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
+ cvSetMouseCallback (ppe->_first_sample_change ? window_name.c_str() : mt->_window_name.c_str(),
+ &cvmousecb, ppe);
+ ppe->_first_sample_change = true;
+ }
+ private:
+ };
+}
+
+
+#endif
diff --git a/jsk_perception/include/jsk_perception/split_image.h b/jsk_perception/include/jsk_perception/split_image.h
new file mode 100644
index 0000000000..858e6cfde9
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/split_image.h
@@ -0,0 +1,69 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2023, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * split_image.h
+ * Author: Yoshiki Obinata
+ */
+
+
+#ifndef SPLIT_IMAGE_H_
+#define SPLIT_IMAGE_H_
+
+#include
+#include
+#include
+
+namespace jsk_perception{
+
+ class SplitImage : public jsk_topic_tools::DiagnosticNodelet{
+
+ public:
+ SplitImage() : DiagnosticNodelet("SplitImage"){}
+ virtual ~SplitImage();
+ protected:
+ virtual void onInit();
+ virtual void subscribe();
+ virtual void unsubscribe();
+ virtual void splitImage(const sensor_msgs::Image::ConstPtr& image_msg);
+
+ ros::Subscriber sub_;
+ std::vector pubs_;
+ int vertical_parts_;
+ int horizontal_parts_;
+ private:
+ };
+};
+
+#endif // SPLIT_IMAGE_H_
diff --git a/jsk_perception/include/jsk_perception/video_to_scene.h b/jsk_perception/include/jsk_perception/video_to_scene.h
new file mode 100644
index 0000000000..15a9473c23
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/video_to_scene.h
@@ -0,0 +1,81 @@
+// -*- mode: C++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2022, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * video_to_scene.h
+ * Author: Kei Okada
+ */
+
+#ifndef VIDEO_TO_SCENE_H_
+#define VIDEO_TO_SCENE_H_
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+namespace jsk_perception{
+ class VideoToScene: public jsk_topic_tools::DiagnosticNodelet{
+ public:
+ typedef VideoToSceneConfig Config;
+ VideoToScene() : DiagnosticNodelet("VideoToScene") {}
+ protected:
+ virtual void onInit();
+ virtual void subscribe();
+ virtual void unsubscribe();
+ virtual void work(const sensor_msgs::Image::ConstPtr& image_msg);
+ virtual void configCallback(Config &config, uint32_t level);
+
+ boost::shared_ptr > srv_;
+ image_transport::Subscriber sub_;
+ boost::shared_ptr it_;
+ image_transport::Publisher pub_;
+ boost::mutex mutex_;
+ private:
+ cv::Ptr bgsubtractor;
+ int min_percent_;
+ int max_percent_;
+ bool captured_;
+
+ };
+}
+
+#endif // VIDEO_TO_SCENE_H_
diff --git a/jsk_perception/launch/classification.launch b/jsk_perception/launch/classification.launch
new file mode 100644
index 0000000000..fbf14fae0e
--- /dev/null
+++ b/jsk_perception/launch/classification.launch
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ host: $(arg host)
+ port: $(arg port)
+ image_transport: $(arg image_transport)
+
+
+
+
+
+
diff --git a/jsk_perception/launch/ofa_gui.launch b/jsk_perception/launch/ofa_gui.launch
new file mode 100644
index 0000000000..c0cba568db
--- /dev/null
+++ b/jsk_perception/launch/ofa_gui.launch
@@ -0,0 +1,6 @@
+
+
+
+
+
diff --git a/jsk_perception/launch/video_to_scene.launch b/jsk_perception/launch/video_to_scene.launch
new file mode 100644
index 0000000000..c262c3ab2c
--- /dev/null
+++ b/jsk_perception/launch/video_to_scene.launch
@@ -0,0 +1,25 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/jsk_perception/launch/vqa.launch b/jsk_perception/launch/vqa.launch
new file mode 100644
index 0000000000..32b3988b4b
--- /dev/null
+++ b/jsk_perception/launch/vqa.launch
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ host: $(arg host)
+ port: $(arg port)
+ image_transport: $(arg image_transport)
+
+
+
+
+
+
diff --git a/jsk_perception/node_scripts/alexnet_object_recognition.py b/jsk_perception/node_scripts/alexnet_object_recognition.py
index 7722715d30..73fbdd88b7 100755
--- a/jsk_perception/node_scripts/alexnet_object_recognition.py
+++ b/jsk_perception/node_scripts/alexnet_object_recognition.py
@@ -30,7 +30,7 @@
from jsk_recognition_utils.chainermodels import AlexNetBatchNormalization
import rospy
from sensor_msgs.msg import Image
-from vgg16_object_recognition import VGG16ObjectRecognition
+from jsk_perception.vgg16_object_recognition import VGG16ObjectRecognition
class AlexNetObjectRecognition(VGG16ObjectRecognition):
diff --git a/jsk_perception/node_scripts/binpack_rect_array.py b/jsk_perception/node_scripts/binpack_rect_array.py
index d7854b9efb..21ee0fb80b 100755
--- a/jsk_perception/node_scripts/binpack_rect_array.py
+++ b/jsk_perception/node_scripts/binpack_rect_array.py
@@ -24,6 +24,8 @@ def width_cmp(self, another):
return self.rect.width - another.rect.width
def __cmp__(self, another): # python2
return self.max_cmp(another) or self.min_cmp(another) or self.height_cmp(another) or self.width_cmp(another) or 0
+ def __lt__(self, another): # python3
+ return self.max_cmp(another) or self.min_cmp(another) or self.height_cmp(another) or self.width_cmp(another) or 0
class Packer():
def __init__(self):
diff --git a/jsk_perception/node_scripts/bof_histogram_extractor.py b/jsk_perception/node_scripts/bof_histogram_extractor.py
index 4b06167593..e8ccb05186 100755
--- a/jsk_perception/node_scripts/bof_histogram_extractor.py
+++ b/jsk_perception/node_scripts/bof_histogram_extractor.py
@@ -5,8 +5,12 @@
"""
import gzip
-import cPickle as pickle
-from distutils.version import StrictVersion
+import sys
+if sys.version_info.major <= 2:
+ import cPickle as pickle
+else: # for python3
+ import _pickle as pickle
+from distutils.version import LooseVersion
from pkg_resources import get_distribution
import numpy as np
@@ -34,14 +38,20 @@ def __init__(self):
if bof_data is None:
quit()
with gzip.open(bof_data, 'rb') as f:
- self.bof = pickle.load(f)
- if (StrictVersion(get_distribution('scikit-learn').version) >=
- StrictVersion('0.17.0')):
+ if sys.version_info.major <= 2:
+ self.bof = pickle.load(f)
+ else:
+ self.bof = pickle.load(f, encoding='latin1')
+ if (LooseVersion(get_distribution('scikit-learn').version) >=
+ LooseVersion('0.17')):
if 'n_jobs' not in self.bof.nn.__dict__ or not isinstance(self.bof.nn.n_jobs, int):
# In scikit-learn>=0.17.0,
# sklearn.neighbors.NearestNeighbors needs 'n_jobs' attribute.
# https://github.com/jsk-ros-pkg/jsk_recognition/issues/1669
self.bof.nn.n_jobs = 1
+ # noetic uses newer scikit-learn which uses n_samples_fit_
+ if 'n_ssamples_fit_' not in self.bof.nn.__dict__:
+ self.bof.nn.n_samples_fit_ = self.bof.nn._fit_X.shape[0]
self._pub = self.advertise('~output', VectorArray, queue_size=1)
rospy.loginfo('Initialized BoF histogram extractor')
@@ -79,7 +89,7 @@ def _apply(self, feature_msg, label_msg):
decomposed = decompose_descriptors_with_label(
descriptors=desc, positions=pos, label_img=label,
skip_zero_label=True)
- X = np.array(decomposed.values())
+ X = np.array(list(decomposed.values()))
if X.size == 0:
return
X = self.bof.transform(X)
diff --git a/jsk_perception/node_scripts/classification_node.py b/jsk_perception/node_scripts/classification_node.py
new file mode 100755
index 0000000000..5c9debf692
--- /dev/null
+++ b/jsk_perception/node_scripts/classification_node.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import rospy
+from jsk_perception.vil_inference_client import ClipClientNode
+
+
+def main():
+ rospy.init_node("classification")
+ node = ClipClientNode()
+ rospy.spin()
+
+if __name__ == "__main__":
+ main()
diff --git a/jsk_perception/node_scripts/hand_pose_estimation_2d.py b/jsk_perception/node_scripts/hand_pose_estimation_2d.py
index 4617760375..800e706259 100755
--- a/jsk_perception/node_scripts/hand_pose_estimation_2d.py
+++ b/jsk_perception/node_scripts/hand_pose_estimation_2d.py
@@ -253,7 +253,7 @@ def _cb_with_depth_info(self, img_msg, depth_msg, camera_info_msg):
elif depth_msg.encoding != '32FC1':
rospy.logerr('Unsupported depth encoding: %s' % depth_msg.encoding)
- hands_points, hands_point_scores, hands_score = self.hand_pose_estimate(img)
+ hands_points, hands_point_scores, hands_score = self.hand_pose_estimate(img, img_msg.header)
hand_pose_2d_msg = self._create_2d_hand_pose_array_msgs(
hands_points,
@@ -318,7 +318,7 @@ def _cb(self, img_msg):
img = self.bridge.imgmsg_to_cv2(
img_msg, desired_encoding='bgr8')
hands_points, hands_point_scores, hands_score = \
- self.hand_pose_estimate(img)
+ self.hand_pose_estimate(img, img_msg.header)
hand_pose_msg = self._create_2d_hand_pose_array_msgs(
hands_points,
@@ -358,18 +358,19 @@ def _create_2d_hand_pose_array_msgs(
hand_pose_msg.poses.append(pose_msg)
return hand_pose_msg
- def hand_pose_estimate(self, bgr):
+ def hand_pose_estimate(self, bgr, header):
if self.backend == 'torch':
- return self._hand_pose_estimate_torch_backend(bgr)
+ return self._hand_pose_estimate_torch_backend(bgr, header)
raise ValueError('Unsupported backend: {0}'.format(self.backend))
- def _hand_pose_estimate_torch_backend(self, frame):
+ def _hand_pose_estimate_torch_backend(self, frame, header):
hands_points, hands_rect, hands_point_scores, hands_score = \
self.pyramid_inference(frame)
if self.visualize:
vis_img = self._draw_joints(frame, hands_points, hands_rect)
vis_msg = self.bridge.cv2_to_imgmsg(vis_img, encoding='bgr8')
+ vis_msg.header = header
self.image_pub.publish(vis_msg)
return hands_points, hands_point_scores, hands_score
diff --git a/jsk_perception/node_scripts/label_image_classifier.py b/jsk_perception/node_scripts/label_image_classifier.py
index 7804510b22..8494773d97 100755
--- a/jsk_perception/node_scripts/label_image_classifier.py
+++ b/jsk_perception/node_scripts/label_image_classifier.py
@@ -7,45 +7,8 @@
import rospy
from sensor_msgs.msg import Image
+from jsk_perception.label_image_classifier import LabelImageClassifier
-class LabelImageClassifier(ConnectionBasedTransport):
-
- classifier_name = 'label_image_classifier'
-
- def __init__(self):
- super(LabelImageClassifier, self).__init__()
- self.ignore_labels = rospy.get_param('~ignore_labels', [])
- self.target_names = rospy.get_param('~target_names', [])
- self.pub = self.advertise(
- '~output', ClassificationResult, queue_size=1)
-
- def subscribe(self):
- self.sub = rospy.Subscriber('~input', Image, self._cb)
-
- def unsubscribe(self):
- self.sub.unregister()
-
- def _cb(self, imgmsg):
- bridge = cv_bridge.CvBridge()
- img = bridge.imgmsg_to_cv2(imgmsg)
- label, proba = self._classify(img)
- msg = ClassificationResult()
- msg.header = imgmsg.header
- msg.labels = [label]
- msg.label_names = [self.target_names[label]]
- msg.label_proba = [proba[label]]
- msg.probabilities = proba
- msg.classifier = self.classifier_name
- msg.target_names = self.target_names
- self.pub.publish(msg)
-
- def _classify(self, label_img):
- counts = np.bincount(label_img.flatten(),
- minlength=len(self.target_names))
- counts[self.ignore_labels] = 0
- label = np.argmax(counts)
- proba = counts.astype(np.float32) / counts.sum()
- return label, proba
if __name__ == '__main__':
diff --git a/jsk_perception/node_scripts/matchtemplate.py b/jsk_perception/node_scripts/matchtemplate.py
index 0a46300e0b..432aa5dfb0 100755
--- a/jsk_perception/node_scripts/matchtemplate.py
+++ b/jsk_perception/node_scripts/matchtemplate.py
@@ -3,7 +3,10 @@
import roslib; roslib.load_manifest('jsk_perception')
import rospy
import numpy as np
-import thread
+try:
+ import thread
+except:
+ import _thread as thread
from sensor_msgs.msg import Image
from geometry_msgs.msg import *
from jsk_recognition_msgs.msg import Rect
@@ -37,10 +40,10 @@ def _MinMaxLock2nd(arr,ex_size,is_min):
if is_min: idx = 0
else: idx = 1
status = cv2.minMaxLoc(arr)
- pt1 = (max(status[2+idx][0]-ex_size[0]/2,0),
- max(status[2+idx][1]-ex_size[1]/2,0))
- pt2 = (min(status[2+idx][0]+ex_size[0]/2,arr.shape[1]),
- min(status[2+idx][1]+ex_size[1]/2,arr.shape[0]))
+ pt1 = (max(status[2+idx][0]-int(ex_size[0]/2),0),
+ max(status[2+idx][1]-int(ex_size[1]/2),0))
+ pt2 = (min(status[2+idx][0]+int(ex_size[0]/2),arr.shape[1]),
+ min(status[2+idx][1]+int(ex_size[1]/2),arr.shape[0]))
mask = np.ones((arr.shape[0], arr.shape[1]), dtype=np.uint8) * 255
mask[pt1[0]:pt2[0], pt1[1]:pt2[1]] = 0
status2 = cv2.minMaxLoc(arr, mask)
@@ -127,8 +130,8 @@ def set_template (self, ref_id='', ref_image=None, ref_rect=None,
template_image = cv2.cvtColor(
ref_image_rect, cv2.COLOR_BGR2HSV)
- self.templates[ref_id]['ref_point'] = (ref_rect[0]+ref_rect[2]/2,
- ref_rect[1]+ref_rect[3]/2)
+ self.templates[ref_id]['ref_point'] = (ref_rect[0]+int(ref_rect[2]/2),
+ ref_rect[1]+int(ref_rect[3]/2))
self.templates[ref_id]['ref_image'] = template_image
rospy.loginfo("set ref_image id=%s, rect=%s", ref_id, ref_rect);
@@ -160,13 +163,13 @@ def set_reference (self, rect):
def set_reference_point_callback (self, msg): # PointStamped
self.lockobj.acquire()
pt = (int(msg.point.x),int(msg.point.y))
- rect = (pt[0]-self.default_template_size[0]/2,
- pt[1]-self.default_template_size[1]/2,
+ rect = (pt[0]-int(self.default_template_size[0]/2),
+ pt[1]-int(self.default_template_size[1]/2),
self.default_template_size[0], self.default_template_size[1])
self.set_reference(rect)
print(rect)
- search_rect = (pt[0]-self.default_search_size[0]/2,
- pt[1]-self.default_search_size[1]/2,
+ search_rect = (pt[0]-int(self.default_search_size[0]/2),
+ pt[1]-int(self.default_search_size[1]/2),
self.default_search_size[0],self.default_search_size[1])
self.set_template('',ser_frame=None, ser_rect=search_rect)
self.lockobj.release()
@@ -204,7 +207,7 @@ def reconfigure(self,config,level):
(template['ref_image'] is not None):
ref_pt = template['ref_point']
ref_size = template['ref_image'].shape
- ref_rect = (ref_pt[0]-ref_size[0]/2,ref_pt[1]-ref_size[1]/2,ref_size[0],ref_size[1])
+ ref_rect = (ref_pt[0]-int(ref_size[0]/2),ref_pt[1]-int(ref_size[1]/2),ref_size[0],ref_size[1])
self.set_template(ref_id=config['current_template_id'],
color=config['template_color_space'],
ref_rect=ref_rect,
@@ -270,8 +273,8 @@ def ser_image_callback (self, msg):
status = MaxLock2nd(results,reference_size) # maximum for others
status = (1 - status[0], 1 - status[1], status[2], status[3])
- result_pt = (status[2][0]+search_rect[0]+reference_size[0]/2,
- status[2][1]+search_rect[1]+reference_size[1]/2)
+ result_pt = (status[2][0]+search_rect[0]+int(reference_size[0]/2),
+ status[2][1]+search_rect[1]+int(reference_size[1]/2))
# publish center position as result
result.child_frame_id = template_id
@@ -291,8 +294,8 @@ def ser_image_callback (self, msg):
# self feedback
if self.auto_search_area:
- val_x = result_pt[0]-search_rect[2]/2
- val_y = result_pt[1]-search_rect[3]/2
+ val_x = result_pt[0]-int(search_rect[2]/2)
+ val_y = result_pt[1]-int(search_rect[3]/2)
ser_scale = max(2,5+np.log(status[0])) # ???
new_ser_rect = (
min(max(val_x,0),image_size[0]-search_rect[2]),
@@ -303,10 +306,10 @@ def ser_image_callback (self, msg):
# draw on debug image
if self.show_debug_image:
cv2.rectangle(debug_image,
- (result_pt[0] - reference_size[0]/2,
- result_pt[1] - reference_size[1]/2),
- (result_pt[0] + reference_size[0]/2,
- result_pt[1] + reference_size[1]/2),
+ (result_pt[0] - int(reference_size[0]/2),
+ result_pt[1] - int(reference_size[1]/2)),
+ (result_pt[0] + int(reference_size[0]/2),
+ result_pt[1] + int(reference_size[1]/2)),
color=(0, 0, 255)) # red
cv2.rectangle(debug_image,
(search_rect[0], search_rect[1]),
diff --git a/jsk_perception/node_scripts/people_pose_estimation_2d.py b/jsk_perception/node_scripts/people_pose_estimation_2d.py
index 08cd7bcc19..62b2fbe8ba 100755
--- a/jsk_perception/node_scripts/people_pose_estimation_2d.py
+++ b/jsk_perception/node_scripts/people_pose_estimation_2d.py
@@ -706,7 +706,7 @@ def _draw_joints(self, img, people_joint_positions, all_peaks):
cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color)
cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi)
#
- offset += len(self.index2handname) / 2
+ offset += int(len(self.index2handname) / 2)
return img
@@ -812,8 +812,8 @@ def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand):
for i, heatmap in enumerate(hmaps):
conf = heatmap.max()
cds = np.array(np.where(heatmap==conf)).flatten().tolist()
- py = cy + cds[0] - hand_bgr.shape[0] / 2
- px = cx + cds[1] - hand_bgr.shape[1] / 2
+ py = cy + cds[0] - int(hand_bgr.shape[0] / 2)
+ px = cx + cds[1] - int(hand_bgr.shape[1] / 2)
keypoints.append({'x': px, 'y': py, 'score': conf,
'limb': self.index2handname[idx_offset+i]})
return keypoints
diff --git a/jsk_perception/node_scripts/probability_image_classifier.py b/jsk_perception/node_scripts/probability_image_classifier.py
index 768ea81d72..65282c5199 100755
--- a/jsk_perception/node_scripts/probability_image_classifier.py
+++ b/jsk_perception/node_scripts/probability_image_classifier.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-from label_image_classifier import LabelImageClassifier
+from jsk_perception.label_image_classifier import LabelImageClassifier
import numpy as np
import rospy
diff --git a/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py b/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py
index 3147d7cfe6..1bf0d8007a 100755
--- a/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py
+++ b/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py
@@ -50,22 +50,22 @@ def __init__(self):
self._dimensions_labels = rospy.get_param( '~dimensions_labels', {} )
self._duration_timeout = rospy.get_param( '~duration_timeout', 0.05 )
- try:
- msg_panorama_image = rospy.wait_for_message(
- '~panorama_image', Image, 10)
- msg_panorama_info = rospy.wait_for_message(
- '~panorama_info', PanoramaInfo, 10)
- except (rospy.ROSException, rospy.ROSInterruptException) as e:
- rospy.logerr('{}'.format(e))
- sys.exit(1)
+ msg_panorama_info = None
+ while (not rospy.is_shutdown() and msg_panorama_info is None):
+ try:
+ msg_panorama_info = rospy.wait_for_message(
+ '~panorama_info', PanoramaInfo, 10)
+ except (rospy.ROSException, rospy.ROSInterruptException) as e:
+ rospy.logerr('~panorama_info is not subscribed...')
+ rospy.logerr('waiting ~panorama_info for more 10 seconds')
self._frame_panorama = msg_panorama_info.header.frame_id
self._theta_min = msg_panorama_info.theta_min
self._theta_max = msg_panorama_info.theta_max
self._phi_min = msg_panorama_info.phi_min
self._phi_max = msg_panorama_info.phi_max
- self._image_height = msg_panorama_image.height
- self._image_width = msg_panorama_image.width
+ self._image_height = msg_panorama_info.image_height
+ self._image_width = msg_panorama_info.image_width
self._tf_buffer = tf2_ros.Buffer()
self._tf_listener = tf2_ros.TransformListener(self._tf_buffer)
diff --git a/jsk_perception/node_scripts/regional_feature_based_object_recognition.py b/jsk_perception/node_scripts/regional_feature_based_object_recognition.py
index 0bebd48372..a7217494bb 100755
--- a/jsk_perception/node_scripts/regional_feature_based_object_recognition.py
+++ b/jsk_perception/node_scripts/regional_feature_based_object_recognition.py
@@ -73,6 +73,7 @@ def __init__(self):
rospy.loginfo('Fitting KNN from db')
db = np.load(db_file)
X, y, self.target_names = db['X'], db['y'], db['target_names']
+ self.target_names = np.array(list(map(lambda name: name.decode('utf-8') if hasattr(name, 'decode') else name, self.target_names)))
self.knn = KNeighborsClassifier(n_neighbors=10)
self.knn.fit(X, y)
rospy.loginfo('Finished fitting KNN from db')
diff --git a/jsk_perception/node_scripts/sklearn_classifier.py b/jsk_perception/node_scripts/sklearn_classifier.py
index e42dbef968..be3729f5c4 100755
--- a/jsk_perception/node_scripts/sklearn_classifier.py
+++ b/jsk_perception/node_scripts/sklearn_classifier.py
@@ -3,7 +3,11 @@
#
from __future__ import division
import gzip
-import cPickle as pickle
+import sys
+if sys.version_info.major <= 2:
+ import cPickle as pickle
+else: # for python3
+ import _pickle as pickle
import numpy as np
from sklearn.preprocessing import normalize
@@ -23,7 +27,10 @@ def __init__(self):
def _init_classifier(self):
clf_path = rospy.get_param('~clf_path')
with gzip.open(clf_path) as f:
- self.clf = pickle.load(f)
+ if sys.version_info.major <= 2:
+ self.clf = pickle.load(f)
+ else:
+ self.clf = pickle.load(f, encoding='latin1')
def subscribe(self):
self.sub_hist = rospy.Subscriber('~input', VectorArray, self._predict)
diff --git a/jsk_perception/node_scripts/solidity_rag_merge.py b/jsk_perception/node_scripts/solidity_rag_merge.py
index 7f343d51b1..ba5e2a2b38 100755
--- a/jsk_perception/node_scripts/solidity_rag_merge.py
+++ b/jsk_perception/node_scripts/solidity_rag_merge.py
@@ -39,6 +39,8 @@
def rag_solidity(labels, connectivity=2):
+ if LooseVersion(skimage.__version__) >= '0.16.0':
+ RAG.node = RAG.nodes
graph = RAG()
# The footprint is constructed in such a way that the first
diff --git a/jsk_perception/node_scripts/split_image.py b/jsk_perception/node_scripts/split_image.py
index a9f4b72595..66170ec3b0 100755
--- a/jsk_perception/node_scripts/split_image.py
+++ b/jsk_perception/node_scripts/split_image.py
@@ -47,5 +47,6 @@ def _split_cb(self, msg):
if __name__ == '__main__':
rospy.init_node('split_image')
+ rospy.logwarn("split_image.py would be deprecated. Please use NODELET version of split_image")
SplitImage()
rospy.spin()
diff --git a/jsk_perception/node_scripts/ssd_object_detector.py b/jsk_perception/node_scripts/ssd_object_detector.py
index e95d189f67..359568f9ae 100755
--- a/jsk_perception/node_scripts/ssd_object_detector.py
+++ b/jsk_perception/node_scripts/ssd_object_detector.py
@@ -186,9 +186,9 @@ def image_cb(self, msg):
rect_msg = RectArray(header=msg.header)
for bbox in bboxes:
- rect = Rect(x=bbox[1], y=bbox[0],
- width= bbox[3] - bbox[1],
- height=bbox[2] - bbox[0])
+ rect = Rect(x=int(bbox[1]), y=int(bbox[0]),
+ width= int(bbox[3] - bbox[1]),
+ height=int(bbox[2] - bbox[0]))
rect_msg.rects.append(rect)
if self.profiling:
diff --git a/jsk_perception/node_scripts/vgg16_object_recognition.py b/jsk_perception/node_scripts/vgg16_object_recognition.py
index ce29e13175..4169ecac57 100755
--- a/jsk_perception/node_scripts/vgg16_object_recognition.py
+++ b/jsk_perception/node_scripts/vgg16_object_recognition.py
@@ -43,109 +43,8 @@
import rospy
from sensor_msgs.msg import Image
+from jsk_perception.vgg16_object_recognition import VGG16ObjectRecognition
-class VGG16ObjectRecognition(ConnectionBasedTransport):
-
- mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
-
- def __init__(self):
- super(self.__class__, self).__init__()
- self.insize = 224
- self.gpu = rospy.get_param('~gpu', -1)
- self.target_names = rospy.get_param('~target_names')
- self.model_name = rospy.get_param('~model_name')
- if self.model_name == 'vgg16':
- self.model = VGG16(n_class=len(self.target_names))
- elif self.model_name == 'vgg16_batch_normalization':
- self.model = VGG16BatchNormalization(
- n_class=len(self.target_names))
- else:
- rospy.logerr('Unsupported ~model_name: {0}'
- .format(self.model_name))
- model_file = rospy.get_param('~model_file')
- S.load_hdf5(model_file, self.model)
- if self.gpu != -1:
- self.model.to_gpu(self.gpu)
- self.pub = self.advertise('~output', ClassificationResult,
- queue_size=1)
- self.pub_input = self.advertise(
- '~debug/net_input', Image, queue_size=1)
-
- def subscribe(self):
- if rospy.get_param('~use_mask', False):
- # larger buff_size is necessary for taking time callback
- # http://stackoverflow.com/questions/26415699/ros-subscriber-not-up-to-date/29160379#29160379 # NOQA
- sub = message_filters.Subscriber(
- '~input', Image, queue_size=1, buff_size=2**24)
- sub_mask = message_filters.Subscriber(
- '~input/mask', Image, queue_size=1, buff_size=2**24)
- self.subs = [sub, sub_mask]
- queue_size = rospy.get_param('~queue_size', 10)
- if rospy.get_param('~approximate_sync', False):
- slop = rospy.get_param('~slop', 0.1)
- sync = message_filters.ApproximateTimeSynchronizer(
- self.subs, queue_size=queue_size, slop=slop)
- else:
- sync = message_filters.TimeSynchronizer(
- self.subs, queue_size=queue_size)
- sync.registerCallback(self._recognize)
- else:
- sub = rospy.Subscriber(
- '~input', Image, self._recognize, callback_args=None,
- queue_size=1, buff_size=2**24)
- self.subs = [sub]
-
- def unsubscribe(self):
- for sub in self.subs:
- sub.unregister()
-
- def _recognize(self, imgmsg, mask_msg=None):
- bridge = cv_bridge.CvBridge()
- bgr = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
- if mask_msg is not None:
- mask = bridge.imgmsg_to_cv2(mask_msg)
- if mask.shape != bgr.shape[:2]:
- logerr_throttle(10,
- 'Size of input image and mask is different')
- return
- elif mask.size == 0:
- logerr_throttle(10, 'Size of input mask is 0')
- return
- bgr[mask == 0] = self.mean_bgr
- bgr = skimage.transform.resize(
- bgr, (self.insize, self.insize), preserve_range=True)
- input_msg = bridge.cv2_to_imgmsg(bgr.astype(np.uint8), encoding='bgr8')
- input_msg.header = imgmsg.header
- self.pub_input.publish(input_msg)
-
- blob = (bgr - self.mean_bgr).transpose((2, 0, 1))
- x_data = np.array([blob], dtype=np.float32)
- if self.gpu != -1:
- x_data = cuda.to_gpu(x_data, device=self.gpu)
- if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'):
- x = Variable(x_data, volatile=True)
- self.model.train = False
- self.model(x)
- else:
- with chainer.using_config('train', False), \
- chainer.no_backprop_mode():
- x = Variable(x_data)
- self.model(x)
-
- proba = cuda.to_cpu(self.model.pred.data)[0]
- label = np.argmax(proba)
- label_name = self.target_names[label]
- label_proba = proba[label]
- cls_msg = ClassificationResult(
- header=imgmsg.header,
- labels=[label],
- label_names=[label_name],
- label_proba=[label_proba],
- probabilities=proba,
- classifier=self.model_name,
- target_names=self.target_names,
- )
- self.pub.publish(cls_msg)
if __name__ == '__main__':
diff --git a/jsk_perception/node_scripts/vqa_node.py b/jsk_perception/node_scripts/vqa_node.py
new file mode 100755
index 0000000000..6f3cea1eaf
--- /dev/null
+++ b/jsk_perception/node_scripts/vqa_node.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import rospy
+from jsk_perception.vil_inference_client import OFAClientNode
+
+
+def main():
+ rospy.init_node("vqa")
+ node = OFAClientNode()
+ rospy.spin()
+
+if __name__ == "__main__":
+ main()
diff --git a/jsk_perception/package.xml b/jsk_perception/package.xml
index 9b61648d03..b7e25d7f9c 100644
--- a/jsk_perception/package.xml
+++ b/jsk_perception/package.xml
@@ -94,6 +94,8 @@
python-fcn-pip
python-pytesseract-pip
+ python-requests
+ python3-requests
python-sklearn
python3-sklearn
python-yaml
@@ -119,9 +121,12 @@
+ compressed_depth_image_transport
+ compressed_image_transport
jsk_tools
roslaunch
roslint
+ rosservice
rostest
diff --git a/jsk_perception/plugins/nodelet/libjsk_perception.xml b/jsk_perception/plugins/nodelet/libjsk_perception.xml
index 2d6d012b52..35828883fc 100644
--- a/jsk_perception/plugins/nodelet/libjsk_perception.xml
+++ b/jsk_perception/plugins/nodelet/libjsk_perception.xml
@@ -143,6 +143,9 @@
+
+
@@ -290,8 +293,20 @@
type="jsk_perception::VirtualCameraMono"
base_class_type="nodelet::Nodelet">
+
+
+
+
+
+
diff --git a/jsk_perception/sample/config/sample_ofa_config.rviz b/jsk_perception/sample/config/sample_ofa_config.rviz
new file mode 100644
index 0000000000..458138e652
--- /dev/null
+++ b/jsk_perception/sample/config/sample_ofa_config.rviz
@@ -0,0 +1,181 @@
+Panels:
+ - Class: rviz/Displays
+ Help Height: 138
+ Name: Displays
+ Property Tree Widget:
+ Expanded: ~
+ Splitter Ratio: 0.4870370328426361
+ Tree Height: 509
+ - Class: rviz/Selection
+ Name: Selection
+ - Class: rviz/Tool Properties
+ Expanded:
+ - /2D Pose Estimate1
+ - /2D Nav Goal1
+ - /Publish Point1
+ Name: Tool Properties
+ Splitter Ratio: 0.5886790156364441
+ - Class: rviz/Views
+ Expanded:
+ - /Current View1
+ Name: Views
+ Splitter Ratio: 0.5
+ - Class: rviz/Time
+ Name: Time
+ SyncMode: 0
+ SyncSource: ""
+Preferences:
+ PromptSaveOnExit: true
+Toolbars:
+ toolButtonStyle: 2
+Visualization Manager:
+ Class: ""
+ Displays:
+ - Alpha: 0.5
+ Cell Size: 1
+ Class: rviz/Grid
+ Color: 160; 160; 164
+ Enabled: false
+ Line Style:
+ Line Width: 0.029999999329447746
+ Value: Lines
+ Name: Grid
+ Normal Cell Count: 0
+ Offset:
+ X: 0
+ Y: 0
+ Z: 0
+ Plane: XY
+ Plane Cell Count: 10
+ Reference Frame:
+ Value: false
+ - Class: jsk_rviz_plugin/OverlayImage
+ Enabled: true
+ Name: VQA/VQA/Image
+ Topic: /vqa/result/image
+ Value: true
+ alpha: 0.800000011920929
+ height: 128
+ keep aspect ratio: true
+ left: 10
+ overwrite alpha value: false
+ top: 10
+ transport hint: raw
+ width: 320
+ - Align Bottom: false
+ Background Alpha: 0.800000011920929
+ Background Color: 0; 0; 0
+ Class: jsk_rviz_plugin/String
+ Enabled: true
+ Foreground Alpha: 0.800000011920929
+ Foreground Color: 255; 255; 255
+ Name: VQA/VQA/Visualize
+ Overtake Color Properties: true
+ Overtake Position Properties: true
+ Topic: /vqa/visualize
+ Value: true
+ font: DejaVu Sans Mono
+ height: 500
+ left: 10
+ line width: 2
+ text size: 12
+ top: 320
+ width: 512
+ - Class: jsk_rviz_plugin/OverlayImage
+ Enabled: true
+ Name: Classification/Classification/Image
+ Topic: /classification/result/image
+ Value: true
+ alpha: 0.800000011920929
+ height: 128
+ keep aspect ratio: true
+ left: 530
+ overwrite alpha value: false
+ top: 10
+ transport hint: raw
+ width: 320
+ - Align Bottom: false
+ Background Alpha: 0.800000011920929
+ Background Color: 0; 0; 0
+ Class: jsk_rviz_plugin/String
+ Enabled: true
+ Foreground Alpha: 0.800000011920929
+ Foreground Color: 255; 255; 255
+ Name: ClassificationClassification/Visualize
+ Overtake Color Properties: true
+ Overtake Position Properties: true
+ Topic: /classification/visualize
+ Value: true
+ font: DejaVu Sans Mono
+ height: 500
+ left: 530
+ line width: 2
+ text size: 12
+ top: 320
+ width: 512
+ Enabled: true
+ Global Options:
+ Background Color: 48; 48; 48
+ Default Light: true
+ Fixed Frame: map
+ Frame Rate: 30
+ Name: root
+ Tools:
+ - Class: rviz/Interact
+ Hide Inactive Objects: true
+ - Class: rviz/MoveCamera
+ - Class: rviz/Select
+ - Class: rviz/FocusCamera
+ - Class: rviz/Measure
+ - Class: rviz/SetInitialPose
+ Theta std deviation: 0.2617993950843811
+ Topic: /initialpose
+ X std deviation: 0.5
+ Y std deviation: 0.5
+ - Class: rviz/SetGoal
+ Topic: /move_base_simple/goal
+ - Class: rviz/PublishPoint
+ Single click: true
+ Topic: /clicked_point
+ Value: true
+ Views:
+ Current:
+ Class: rviz/Orbit
+ Distance: 10
+ Enable Stereo Rendering:
+ Stereo Eye Separation: 0.05999999865889549
+ Stereo Focal Distance: 1
+ Swap Stereo Eyes: false
+ Value: false
+ Field of View: 0.7853981852531433
+ Focal Point:
+ X: 0
+ Y: 0
+ Z: 0
+ Focal Shape Fixed Size: true
+ Focal Shape Size: 0.05000000074505806
+ Invert Z Axis: false
+ Name: Current View
+ Near Clip Distance: 0.009999999776482582
+ Pitch: 0.785398006439209
+ Target Frame:
+ Yaw: 0.785398006439209
+ Saved: ~
+Window Geometry:
+ Displays:
+ collapsed: false
+ Height: 1025
+ Hide Left Dock: false
+ Hide Right Dock: true
+ QMainWindow State: 000000ff00000000fd00000004000000000000021e000002f7fc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b000000b100fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000b1fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000006f000002f70000018400fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000010f0000037efc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003d0000037e0000013500fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000078000000060fc0100000002fb0000000800540069006d0065010000000000000780000005cd00fffffffb0000000800540069006d0065010000000000000450000000000000000000000556000002f700000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+ Selection:
+ collapsed: false
+ Time:
+ collapsed: false
+ Tool Properties:
+ collapsed: false
+ Views:
+ collapsed: true
+ Width: 1920
+ X: 1440
+ Y: 1096
diff --git a/jsk_perception/sample/image/items_in_shelf.jpg b/jsk_perception/sample/image/items_in_shelf.jpg
new file mode 100644
index 0000000000..699917fa3f
Binary files /dev/null and b/jsk_perception/sample/image/items_in_shelf.jpg differ
diff --git a/jsk_perception/sample/image/pai_no_mi.jpg b/jsk_perception/sample/image/pai_no_mi.jpg
new file mode 100644
index 0000000000..14aff8a629
Binary files /dev/null and b/jsk_perception/sample/image/pai_no_mi.jpg differ
diff --git a/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml b/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml
index b12b21acbb..498dafe82f 100644
--- a/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml
+++ b/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml
@@ -2,7 +2,7 @@
+ command="$(find xacro)/xacro '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
diff --git a/jsk_perception/sample/sample_classification.launch b/jsk_perception/sample/sample_classification.launch
new file mode 100644
index 0000000000..71984cea22
--- /dev/null
+++ b/jsk_perception/sample/sample_classification.launch
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/jsk_perception/sample/sample_color_histogram.launch b/jsk_perception/sample/sample_color_histogram.launch
index b5c1fb791b..d75d2fc11c 100644
--- a/jsk_perception/sample/sample_color_histogram.launch
+++ b/jsk_perception/sample/sample_color_histogram.launch
@@ -24,8 +24,9 @@
use_window: $(arg gui)
+
+ pkg="jsk_perception" type="publish_mouse_event.py">
image_width: 256
diff --git a/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch b/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch
index 12d867d767..457a49071d 100644
--- a/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch
+++ b/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch
@@ -6,21 +6,33 @@
+
+
+
+
+
+
+
+ args="load jsk_perception/DualFisheyeToPanorama $(arg MANAGER)">
+
+
diff --git a/jsk_perception/sample/sample_insta360_air.launch b/jsk_perception/sample/sample_insta360_air.launch
index 06bbba3e5e..b79b010c55 100644
--- a/jsk_perception/sample/sample_insta360_air.launch
+++ b/jsk_perception/sample/sample_insta360_air.launch
@@ -27,7 +27,16 @@
+
+
+
+
+
+
@@ -57,7 +66,10 @@
-
+
vertical_parts: 1
@@ -68,7 +80,7 @@
@@ -76,7 +88,7 @@
@@ -84,7 +96,7 @@
@@ -101,6 +113,10 @@
+
+
+
+
diff --git a/jsk_perception/sample/sample_invert_mask_image.launch b/jsk_perception/sample/sample_invert_mask_image.launch
new file mode 100644
index 0000000000..d7c338f962
--- /dev/null
+++ b/jsk_perception/sample/sample_invert_mask_image.launch
@@ -0,0 +1,81 @@
+
+
+
+
+
+
+
+
+
+
+ file_name: $(find jsk_perception)/sample/kiva_pod_image_color.jpg
+ encoding: bgr8
+ publish_info: false
+
+
+
+
+
+
+ file_name: $(find jsk_perception)/sample/kiva_pod_mask.jpg
+ encoding: mono8
+ publish_info: false
+
+
+
+
+
+
+
+
+
+
+
+
+
+ approximate_sync: true
+
+
+
+
+
+
+
+ approximate_sync: true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/jsk_perception/sample/sample_point_pose_extractor.launch b/jsk_perception/sample/sample_point_pose_extractor.launch
new file mode 100644
index 0000000000..07112f8081
--- /dev/null
+++ b/jsk_perception/sample/sample_point_pose_extractor.launch
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+ file_name: $(find jsk_perception)/sample/image/items_in_shelf.jpg
+ encoding: bgr8
+ publish_info: true
+ fovx: 84.1
+ fovy: 53.8
+
+
+
+
+
+
+
+
+
+
+
+
+ template_filename: $(find jsk_perception)/sample/image/pai_no_mi.jpg
+ template_id: 0
+ object_width: 0.145
+ object_height: 0.128
+ viewer_window: $(arg gui)
+
+
+
diff --git a/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch b/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch
index e3982cec2b..9d0a9a5a0e 100644
--- a/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch
+++ b/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch
@@ -47,7 +47,6 @@
type="rect_array_in_panorama_to_bounding_box_array.py"
name="rect_array_in_panorama_to_bounding_box_array"
>
-
diff --git a/jsk_perception/sample/sample_split_image.launch b/jsk_perception/sample/sample_split_image.launch
index 400e9ba0a6..b683a995ea 100644
--- a/jsk_perception/sample/sample_split_image.launch
+++ b/jsk_perception/sample/sample_split_image.launch
@@ -12,14 +12,16 @@
publish_info: true
fovx: 84.1
fovy: 53.8
+ rate: 60.0
-
+
vertical_parts: 2
horizontal_parts: 2
+ always_subscribe: true
diff --git a/jsk_perception/sample/sample_video_to_scene.launch b/jsk_perception/sample/sample_video_to_scene.launch
new file mode 100644
index 0000000000..94a4c1d0af
--- /dev/null
+++ b/jsk_perception/sample/sample_video_to_scene.launch
@@ -0,0 +1,35 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/jsk_perception/sample/sample_vqa.launch b/jsk_perception/sample/sample_vqa.launch
new file mode 100644
index 0000000000..daa9865e50
--- /dev/null
+++ b/jsk_perception/sample/sample_vqa.launch
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/jsk_perception/scripts/send_classification_query_action.py b/jsk_perception/scripts/send_classification_query_action.py
new file mode 100755
index 0000000000..db15f17a02
--- /dev/null
+++ b/jsk_perception/scripts/send_classification_query_action.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+import actionlib
+import jsk_recognition_msgs.msg
+import rospy
+from sensor_msgs.msg import Image
+
+def cb(image):
+ ac = actionlib.SimpleActionClient("/classification/inference_server",
+ jsk_recognition_msgs.msg.ClassificationTaskAction)
+ ac.wait_for_server()
+ goal = jsk_recognition_msgs.msg.ClassificationTaskGoal()
+ goal.image = image
+ goal.queries = ["human", "apple", "book"]
+ ac.send_goal(goal)
+ ac.wait_for_result()
+ print(ac.get_result())
+
+rospy.init_node("test_classification_action")
+sub = rospy.Subscriber("/usb_cam/image_raw", Image, cb)
+rospy.spin()
diff --git a/jsk_perception/scripts/send_vqa_query_action.py b/jsk_perception/scripts/send_vqa_query_action.py
new file mode 100755
index 0000000000..ad1325290a
--- /dev/null
+++ b/jsk_perception/scripts/send_vqa_query_action.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+import actionlib
+import jsk_recognition_msgs.msg
+import rospy
+from sensor_msgs.msg import Image
+
+def cb(image):
+ ac = actionlib.SimpleActionClient("/vqa/inference_server" , jsk_recognition_msgs.msg.VQATaskAction)
+ ac.wait_for_server()
+ goal = jsk_recognition_msgs.msg.VQATaskGoal()
+ goal.image = image
+ goal.questions = ["what does this image decribe?"]
+ ac.send_goal(goal)
+ ac.wait_for_result()
+ print(ac.get_result())
+
+rospy.init_node("test_vqa_action")
+sub = rospy.Subscriber("/usb_cam/image_raw", Image, cb)
+rospy.spin()
diff --git a/jsk_perception/scripts/vqa_request.py b/jsk_perception/scripts/vqa_request.py
new file mode 100755
index 0000000000..ee7c1e8df3
--- /dev/null
+++ b/jsk_perception/scripts/vqa_request.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import argparse
+import actionlib
+import cv2
+from cv_bridge import CvBridge
+from jsk_recognition_msgs.msg import VQATaskAction, VQATaskActionGoal
+import json
+import rospy # for logging
+import os
+
+class VQAClient(object):
+ def __init__(self):
+ self.ac_caption_client = actionlib.SimpleActionClient("/vqa/vqa_server", VQATaskAction)
+ self._bridge = CvBridge()
+
+ def request(self, questions_path, image_path, output_dir=None):
+ self.ac_caption_client.wait_for_server()
+ caption_goal = VQATaskActionGoal()
+ cv_image = cv2.imread(image_path)
+ image = self._bridge.cv2_to_imgmsg(cv_image, "bgr8")
+ caption_goal.goal.image = image
+ with open(questions_path) as f:
+ for q in f:
+ caption_goal.goal.questions.append(q)
+ self.ac_caption_client.send_goal(caption_goal.goal)
+ self.ac_caption_client.wait_for_result()
+ caption_result = self.ac_caption_client.get_result()
+ result = {}
+ for r in caption_result.result.result:
+ result[r.question] = r.answer
+ image_name = os.path.splitext(os.path.basename(image_path))[0]
+ json_name = image_name + ".json"
+ if output_dir:
+ save_path = os.path.join(output_dir, json_name)
+ else:
+ save_path = os.path.join(os.path.dirname(image_path), json_name)
+ with open(save_path, "w") as f:
+ json.dump(result, f, indent=4, separators=(',', ': '))
+
+parser = argparse.ArgumentParser(description="CLI interface for VQA action client")
+parser.add_argument("questions_path", help="Question text file path of VQA input", type=str)
+parser.add_argument("image_path", help="Image file path of VQA input", type=str)
+parser.add_argument("-o", "--output", default=None)
+
+args = parser.parse_args()
+
+if __name__ == "__main__":
+ rospy.init_node("vqa_request_client", anonymous=True)
+ client = VQAClient()
+ questions_path = args.questions_path
+ image_path = args.image_path
+ output = args.output
+ client.request(questions_path, image_path, output)
diff --git a/jsk_perception/setup.py b/jsk_perception/setup.py
index 66f43a495a..b4eee0e9e8 100644
--- a/jsk_perception/setup.py
+++ b/jsk_perception/setup.py
@@ -3,6 +3,9 @@
from setuptools import setup
from catkin_pkg.python_setup import generate_distutils_setup
-d = generate_distutils_setup()
+d = generate_distutils_setup(
+ packages=['jsk_perception'],
+ package_dir={'': 'src'},
+)
setup(**d)
diff --git a/jsk_perception/src/dual_fisheye_to_panorama.cpp b/jsk_perception/src/dual_fisheye_to_panorama.cpp
index d749cec51b..a10726d6ee 100644
--- a/jsk_perception/src/dual_fisheye_to_panorama.cpp
+++ b/jsk_perception/src/dual_fisheye_to_panorama.cpp
@@ -92,6 +92,8 @@ namespace jsk_perception
msg_panorama_info_.theta_max = M_PI;
msg_panorama_info_.phi_min = -M_PI;
msg_panorama_info_.phi_max = M_PI;
+ msg_panorama_info_.image_height = output_image_height_;
+ msg_panorama_info_.image_width = output_image_width_;
onInitPostProcess();
}
diff --git a/jsk_perception/src/invert_mask_image.cpp b/jsk_perception/src/invert_mask_image.cpp
new file mode 100644
index 0000000000..3fca1bdd69
--- /dev/null
+++ b/jsk_perception/src/invert_mask_image.cpp
@@ -0,0 +1,88 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2022, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+
+#include "jsk_perception/invert_mask_image.h"
+#include
+#include
+#include
+#include
+
+
+namespace jsk_perception
+{
+ void InvertMaskImage::onInit()
+ {
+ DiagnosticNodelet::onInit();
+ pub_mask_ = advertise(*pnh_, "output", 1);
+ onInitPostProcess();
+ }
+
+ InvertMaskImage::~InvertMaskImage() {
+ }
+
+ void InvertMaskImage::subscribe()
+ {
+ int queue_size;
+ pnh_->param("queue_size", queue_size, 1);
+ sub_mask_ = pnh_->subscribe("input", queue_size, &InvertMaskImage::invert, this);
+ ros::V_string names = boost::assign::list_of("~input");
+ jsk_topic_tools::warnNoRemap(names);
+ }
+
+ void InvertMaskImage::unsubscribe()
+ {
+ sub_mask_.shutdown();
+ }
+
+ void InvertMaskImage::invert(const sensor_msgs::Image::ConstPtr& mask_msg)
+ {
+ vital_checker_->poke();
+ cv::Mat mask;
+ try {
+ mask = cv_bridge::toCvShare(mask_msg, "mono8")->image;
+ } catch(std::exception &e) {
+ ROS_ERROR("cv_bridge exception: %s", e.what());
+ return;
+ }
+ cv::bitwise_not(mask, mask);
+ pub_mask_.publish(cv_bridge::CvImage(
+ mask_msg->header,
+ "mono8",
+ mask).toImageMsg());
+ }
+}
+
+#include
+PLUGINLIB_EXPORT_CLASS (jsk_perception::InvertMaskImage, nodelet::Nodelet);
diff --git a/jsk_perception/src/jsk_perception/__init__.py b/jsk_perception/src/jsk_perception/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/jsk_perception/src/jsk_perception/label_image_classifier.py b/jsk_perception/src/jsk_perception/label_image_classifier.py
new file mode 100644
index 0000000000..08567fb847
--- /dev/null
+++ b/jsk_perception/src/jsk_perception/label_image_classifier.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import cv_bridge
+from jsk_recognition_msgs.msg import ClassificationResult
+from jsk_topic_tools import ConnectionBasedTransport
+import numpy as np
+import rospy
+from sensor_msgs.msg import Image
+
+class LabelImageClassifier(ConnectionBasedTransport):
+
+ classifier_name = 'label_image_classifier'
+
+ def __init__(self):
+ super(LabelImageClassifier, self).__init__()
+ self.ignore_labels = rospy.get_param('~ignore_labels', [])
+ self.target_names = rospy.get_param('~target_names', [])
+ self.pub = self.advertise(
+ '~output', ClassificationResult, queue_size=1)
+
+ def subscribe(self):
+ self.sub = rospy.Subscriber('~input', Image, self._cb)
+
+ def unsubscribe(self):
+ self.sub.unregister()
+
+ def _cb(self, imgmsg):
+ bridge = cv_bridge.CvBridge()
+ img = bridge.imgmsg_to_cv2(imgmsg)
+ label, proba = self._classify(img)
+ msg = ClassificationResult()
+ msg.header = imgmsg.header
+ msg.labels = [label]
+ msg.label_names = [self.target_names[label]]
+ msg.label_proba = [proba[label]]
+ msg.probabilities = proba
+ msg.classifier = self.classifier_name
+ msg.target_names = self.target_names
+ self.pub.publish(msg)
+
+ def _classify(self, label_img):
+ counts = np.bincount(label_img.flatten(),
+ minlength=len(self.target_names))
+ counts[self.ignore_labels] = 0
+ label = np.argmax(counts)
+ proba = counts.astype(np.float32) / counts.sum()
+ return label, proba
diff --git a/jsk_perception/src/jsk_perception/vgg16_object_recognition.py b/jsk_perception/src/jsk_perception/vgg16_object_recognition.py
new file mode 100644
index 0000000000..f582daa74b
--- /dev/null
+++ b/jsk_perception/src/jsk_perception/vgg16_object_recognition.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import itertools, pkg_resources, sys
+from distutils.version import LooseVersion
+if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \
+ sys.version_info.major == 2:
+ print('''Please install chainer < 7.0.0:
+
+ sudo pip install chainer==6.7.0
+
+c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485
+''', file=sys.stderr)
+ sys.exit(1)
+if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name or "cupy" == p.project_name ] == []:
+ print('''Please install CuPy
+
+ sudo pip install cupy-cuda[your cuda version]
+i.e.
+ sudo pip install cupy-cuda91
+
+''', file=sys.stderr)
+ # sys.exit(1)
+import chainer
+from chainer import cuda
+import chainer.serializers as S
+from chainer import Variable
+from distutils.version import LooseVersion
+import numpy as np
+import skimage.transform
+
+import cv_bridge
+from jsk_recognition_msgs.msg import ClassificationResult
+from jsk_recognition_utils.chainermodels import VGG16
+from jsk_recognition_utils.chainermodels import VGG16BatchNormalization
+from jsk_topic_tools import ConnectionBasedTransport
+from jsk_topic_tools.log_utils import logerr_throttle
+import message_filters
+import rospy
+from sensor_msgs.msg import Image
+
+
+class VGG16ObjectRecognition(ConnectionBasedTransport):
+
+ mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+
+ def __init__(self):
+ super(self.__class__, self).__init__()
+ self.insize = 224
+ self.gpu = rospy.get_param('~gpu', -1)
+ self.target_names = rospy.get_param('~target_names')
+ self.model_name = rospy.get_param('~model_name')
+ if self.model_name == 'vgg16':
+ self.model = VGG16(n_class=len(self.target_names))
+ elif self.model_name == 'vgg16_batch_normalization':
+ self.model = VGG16BatchNormalization(
+ n_class=len(self.target_names))
+ else:
+ rospy.logerr('Unsupported ~model_name: {0}'
+ .format(self.model_name))
+ model_file = rospy.get_param('~model_file')
+ S.load_hdf5(model_file, self.model)
+ if self.gpu != -1:
+ self.model.to_gpu(self.gpu)
+ self.pub = self.advertise('~output', ClassificationResult,
+ queue_size=1)
+ self.pub_input = self.advertise(
+ '~debug/net_input', Image, queue_size=1)
+
+ def subscribe(self):
+ if rospy.get_param('~use_mask', False):
+ # larger buff_size is necessary for taking time callback
+ # http://stackoverflow.com/questions/26415699/ros-subscriber-not-up-to-date/29160379#29160379 # NOQA
+ sub = message_filters.Subscriber(
+ '~input', Image, queue_size=1, buff_size=2**24)
+ sub_mask = message_filters.Subscriber(
+ '~input/mask', Image, queue_size=1, buff_size=2**24)
+ self.subs = [sub, sub_mask]
+ queue_size = rospy.get_param('~queue_size', 10)
+ if rospy.get_param('~approximate_sync', False):
+ slop = rospy.get_param('~slop', 0.1)
+ sync = message_filters.ApproximateTimeSynchronizer(
+ self.subs, queue_size=queue_size, slop=slop)
+ else:
+ sync = message_filters.TimeSynchronizer(
+ self.subs, queue_size=queue_size)
+ sync.registerCallback(self._recognize)
+ else:
+ sub = rospy.Subscriber(
+ '~input', Image, self._recognize, callback_args=None,
+ queue_size=1, buff_size=2**24)
+ self.subs = [sub]
+
+ def unsubscribe(self):
+ for sub in self.subs:
+ sub.unregister()
+
+ def _recognize(self, imgmsg, mask_msg=None):
+ bridge = cv_bridge.CvBridge()
+ bgr = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
+ if mask_msg is not None:
+ mask = bridge.imgmsg_to_cv2(mask_msg)
+ if mask.shape != bgr.shape[:2]:
+ logerr_throttle(10,
+ 'Size of input image and mask is different')
+ return
+ elif mask.size == 0:
+ logerr_throttle(10, 'Size of input mask is 0')
+ return
+ bgr[mask == 0] = self.mean_bgr
+ bgr = skimage.transform.resize(
+ bgr, (self.insize, self.insize), preserve_range=True)
+ input_msg = bridge.cv2_to_imgmsg(bgr.astype(np.uint8), encoding='bgr8')
+ input_msg.header = imgmsg.header
+ self.pub_input.publish(input_msg)
+
+ blob = (bgr - self.mean_bgr).transpose((2, 0, 1))
+ x_data = np.array([blob], dtype=np.float32)
+ if self.gpu != -1:
+ x_data = cuda.to_gpu(x_data, device=self.gpu)
+ if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'):
+ x = Variable(x_data, volatile=True)
+ self.model.train = False
+ self.model(x)
+ else:
+ with chainer.using_config('train', False), \
+ chainer.no_backprop_mode():
+ x = Variable(x_data)
+ self.model(x)
+
+ proba = cuda.to_cpu(self.model.pred.data)[0]
+ label = np.argmax(proba)
+ label_name = self.target_names[label]
+ label_proba = proba[label]
+ cls_msg = ClassificationResult(
+ header=imgmsg.header,
+ labels=[label],
+ label_names=[label_name],
+ label_proba=[label_proba],
+ probabilities=proba,
+ classifier=self.model_name,
+ target_names=self.target_names,
+ )
+ self.pub.publish(cls_msg)
diff --git a/jsk_perception/src/jsk_perception/vil_inference_client.py b/jsk_perception/src/jsk_perception/vil_inference_client.py
new file mode 100644
index 0000000000..02026af4ff
--- /dev/null
+++ b/jsk_perception/src/jsk_perception/vil_inference_client.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python
+import abc
+import base64
+import json
+
+import actionlib
+import requests
+import rospy
+from cv_bridge import CvBridge
+from dynamic_reconfigure.server import Server
+from jsk_perception.cfg import ClassificationConfig, VQAConfig
+from jsk_recognition_msgs.msg import (ClassificationResult,
+ ClassificationTaskAction,
+ ClassificationTaskFeedback,
+ ClassificationTaskResult,
+ QuestionAndAnswerText, VQAResult,
+ VQATaskAction, VQATaskFeedback,
+ VQATaskResult)
+from requests.exceptions import ConnectionError
+from sensor_msgs.msg import CompressedImage, Image
+from std_msgs.msg import String
+
+import cv2
+
+
+class DockerInferenceClientBase(object):
+ def __init__(self, action,
+ server_config,
+ result_topic,
+ action_feedback,
+ action_result,
+ app_name):
+ # inference server configuration
+ self.host = rospy.get_param("~host", default="localhost")
+ self.port = rospy.get_param("~port", default=8888)
+ self.app_name = app_name
+ # cv bridge
+ self._bridge = CvBridge()
+ # default inference image
+ self.default_img = None
+ # ROS
+ self.transport_hint = rospy.get_param('~image_transport', 'raw')
+ if self.transport_hint == 'compressed':
+ self.image_sub = rospy.Subscriber(
+ "{}/compressed".format(rospy.resolve_name('~image')),
+ CompressedImage,
+ callback=self.topic_cb,
+ queue_size=1,
+ buff_size=2**26
+ )
+
+ else:
+ self.image_sub = rospy.Subscriber("~image", Image,
+ callback=self.topic_cb,
+ queue_size=1,
+ buff_size=2**26)
+ self.result_topic_type = result_topic
+ self.result_pub = rospy.Publisher("~result", result_topic, queue_size=1)
+ if self.transport_hint == 'compressed':
+ self.image_pub = rospy.Publisher("~result/image/compressed", CompressedImage, queue_size=1)
+ else:
+ self.image_pub = rospy.Publisher("~result/image", Image, queue_size=1)
+ self.vis_pub = rospy.Publisher("~visualize", String, queue_size=1)
+ self.action_server = actionlib.SimpleActionServer("~inference_server",
+ action,
+ execute_cb=self.action_cb,
+ auto_start=False)
+ self.action_feedback = action_feedback
+ self.action_result = action_result
+ self.reconfigure_server = Server(server_config, self.config_cb)
+ self.action_server.start()
+
+ def ros_img_to_base(self, ros_img):
+ if type(ros_img) is CompressedImage:
+ cv_img = self._bridge.compressed_imgmsg_to_cv2(ros_img, desired_encoding="bgr8")
+ elif type(ros_img) is Image:
+ cv_img = self._bridge.imgmsg_to_cv2(ros_img, desired_encoding="bgr8")
+ else:
+ raise RuntimeError("Unknown type {}".format(type(ros_img)))
+ # convert to base64
+ encimg = cv2.imencode(".png", cv_img)[1]
+ img_str = encimg.tostring()
+ img_byte = base64.b64encode(img_str).decode("utf-8")
+ return img_byte
+
+ def config_cb(self, config, level):
+ self.config = config
+ return config
+
+ @abc.abstractmethod
+ def topic_cb(self, msg):
+ pass
+
+ def action_cb(self, goal):
+ success = True
+ result = self.action_result()
+ feedback = self.action_feedback()
+ if goal.image.data and (not goal.compressed_image.data):
+ image = goal.image
+ # result.result.image = image
+ elif (not goal.image.data) and goal.compressed_image.data:
+ image = goal.compressed_image
+ # result.result.compressed_image = image
+ elif goal.image.data and goal.image.compressed_image.data:
+ rospy.logerr("Both image and compressed image can not be added simultaneously")
+ return
+ else:
+ rospy.loginfo("No images in goal message, so using subscribed image topic instead")
+ image = self.default_img
+ queries = self.create_queries(goal)
+ try:
+ result.result = self.inference(image, queries)
+ except Exception as e:
+ rospy.logerr(str(e))
+ feedback.status = str(e)
+ success = False
+ finally:
+ self.action_server.publish_feedback(feedback)
+ result.done = success
+ self.action_server.set_succeeded(result)
+
+ @abc.abstractmethod
+ def create_queries(self, goal):
+ pass
+
+ @abc.abstractmethod
+ def inference(self, img_msg, queries):
+ pass
+
+ def send_request(self, content):
+ url = "http://{}:{}/{}".format(self.host, str(self.port), self.app_name)
+ try:
+ response = requests.post(url, data=content)
+ except ConnectionError as e:
+ rospy.logwarn_once("Cannot establish the connection with API server. Is it running?")
+ raise e
+ else:
+ if response.status_code == 200:
+ return response
+ else:
+ err_msg = "Invalid http status code: {}".format(str(response.status_code))
+ rospy.logerr(err_msg)
+ raise RuntimeError(err_msg)
+
+
+class ClipClientNode(DockerInferenceClientBase):
+ def __init__(self):
+ DockerInferenceClientBase.__init__(self,
+ ClassificationTaskAction,
+ ClassificationConfig,
+ ClassificationResult,
+ ClassificationTaskFeedback,
+ ClassificationTaskResult,
+ "inference")
+
+ def topic_cb(self, data):
+ if not self.config: rospy.logwarn("No queries"); return
+ if not self.config.queries: rospy.logwarn("No queries"); return
+ queries = self.config.queries.split(";")
+ try:
+ msg = self.inference(data, queries)
+ except Exception: return
+ # publish debug image
+ self.image_pub.publish(data)
+ # publish classification result
+ msg.header = data.header
+ self.result_pub.publish(msg)
+ # publish probabilities result as string
+ vis_msg = ""
+ for i, label in enumerate(msg.label_names):
+ vis_msg += "{}: {:.2f}% ".format(label, msg.probabilities[i]*100)
+ vis_msg += "\n\nCosine Similarity\n"
+ for i, label in enumerate(msg.label_names):
+ vis_msg += "{}: {:.4f} ".format(label, msg.label_proba[i])
+ self.vis_pub.publish(vis_msg)
+
+ def create_queries(self, goal):
+ return goal.queries
+
+ def inference(self, img_msg, queries):
+ img_byte = self.ros_img_to_base(img_msg)
+ req = json.dumps({"image": img_byte,
+ "queries": queries}).encode("utf-8")
+ response = self.send_request(req)
+ result_dic = json.loads(response.text)["results"]
+ labels = []
+ probabilities = []
+ similarities = []
+ for r in result_dic:
+ labels.append(r["question"])
+ probabilities.append(float(r["probability"]))
+ similarities.append(float(r["similarity"]))
+ labels = [label for _,label in sorted(zip(probabilities, labels), reverse=True)]
+ probabilities.sort(reverse=True)
+ similarities.sort(reverse=True)
+ # build ClassificationResult message
+ msg = self.result_topic_type()
+ msg.labels = list(range(len(labels)))
+ msg.label_names = labels
+ msg.label_proba = similarities # cosine similarities
+ msg.probabilities = probabilities # sum(probabilities) is 1
+ msg.classifier = 'clip'
+ msg.target_names = queries
+ return msg
+
+
+class OFAClientNode(DockerInferenceClientBase):
+ def __init__(self):
+ self.vqa_type = rospy.get_param("~vqa_type", default="caption") # caption, vqa_gen. caption is better than vqa_gen in OFA
+ if self.vqa_type not in ["caption", "vqa_gen"]:
+ raise RuntimeError("VQA type must be caption or vqa_gen")
+ DockerInferenceClientBase.__init__(self,
+ VQATaskAction,
+ VQAConfig,
+ VQAResult,
+ VQATaskFeedback,
+ VQATaskResult,
+ self.vqa_type)
+
+ def create_queries(self, goal):
+ return goal.questions
+
+ def inference(self, img_msg, queries):
+ img_byte = self.ros_img_to_base(img_msg)
+ req = json.dumps({"image": img_byte,
+ "queries": queries}).encode("utf-8")
+ response = self.send_request(req)
+ json_result = json.loads(response.text)
+ msg = self.result_topic_type()
+ for result in json_result["results"]:
+ result_msg = QuestionAndAnswerText()
+ result_msg.question = result["question"]
+ result_msg.answer = result["answer"]
+ msg.result.append(result_msg)
+ return msg
+
+ def topic_cb(self, data):
+ if not self.config.questions: rospy.logwarn("No questions"); return
+ queries = self.config.questions.split(";")
+ try:
+ msg = self.inference(data, queries)
+ except Exception: return
+ self.image_pub.publish(data)
+ self.result_pub.publish(msg)
+ vis = ""
+ for qa in msg.result:
+ vis += "Q:{}\n A:{}\n".format(qa.question,
+ qa.answer)
+ self.vis_pub.publish(vis)
diff --git a/jsk_perception/src/point_pose_extractor.cpp b/jsk_perception/src/point_pose_extractor.cpp
index e94a5b7a32..3a49185548 100644
--- a/jsk_perception/src/point_pose_extractor.cpp
+++ b/jsk_perception/src/point_pose_extractor.cpp
@@ -32,677 +32,29 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*********************************************************************/
-
-#include
-#include
-#include
-#if ( CV_MAJOR_VERSION >= 4)
-#include
-#include
-#include
-#else
-#include
-#include
-#endif
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
+#include "jsk_perception/point_pose_extractor.h"
#include
-namespace enc = sensor_msgs::image_encodings;
-
-bool _first_sample_change;
-
-void features2keypoint (posedetection_msgs::Feature0D features,
- std::vector& keypoints,
- cv::Mat& descriptors){
- keypoints.resize(features.scales.size());
- descriptors.create(features.scales.size(),features.descriptor_dim,CV_32FC1);
- std::vector::iterator keypoint_it = keypoints.begin();
- for ( int i=0; keypoint_it != keypoints.end(); ++keypoint_it, ++i ) {
- *keypoint_it = cv::KeyPoint(cv::Point2f(features.positions[i*2+0],
- features.positions[i*2+1]),
- features.descriptor_dim, // size
- features.orientations[i], //angle
- 0, // resonse
- features.scales[i] // octave
- );
- for (int j = 0; j < features.descriptor_dim; j++){
- descriptors.at(i,j) =
- features.descriptors[i*features.descriptor_dim+j];
- }
- }
-}
-
-
-class Matching_Template
-{
-public:
- std::string _matching_frame;
- cv::Mat _template_img;
- std::vector _template_keypoints;
- cv::Mat _template_descriptors;
- int _original_width_size;
- int _original_height_size;
- double _template_width; // width of template [m]
- double _template_height; // height of template [m]
- tf::Transform _relativepose;
- cv::Mat _affine_matrix;
- std::string _window_name;
- // The maximum allowed reprojection error to treat a point pair as an inlier
- double _reprojection_threshold;
- // threshold on squared ratio of distances between NN and 2nd NN
- double _distanceratio_threshold;
- std::vector _correspondances;
- cv::Mat _previous_stack_img;
-
- Matching_Template(){
- }
- Matching_Template(cv::Mat img,
- std::string matching_frame,
- int original_width_size,
- int original_height_size,
- double template_width,
- double template_height,
- tf::Transform relativepose,
- cv::Mat affine_matrix,
- double reprojection_threshold,
- double distanceratio_threshold,
- std::string window_name,
- bool autosize){
-
- _template_img = img.clone();
- _matching_frame = matching_frame;
- _original_width_size = original_width_size;
- _original_height_size = original_height_size;
- _template_width = template_width;
- _template_height = template_height;
- _relativepose = relativepose;
- _affine_matrix = affine_matrix;
- _window_name = window_name;
- _reprojection_threshold = reprojection_threshold;
- _distanceratio_threshold = distanceratio_threshold;
- _template_keypoints.clear();
- _correspondances.clear();
- // cv::namedWindow(_window_name, autosize ? CV_WINDOW_AUTOSIZE : 0);
- }
-
-
- virtual ~Matching_Template(){
- std::cerr << "delete " << _window_name << std::endl;
- }
-
-
- std::string get_window_name(){
- return _window_name;
- }
-
- std::vector* correspondances(){
- return &_correspondances;
- }
-
- bool check_template (){
- if (_template_img.empty() && _template_keypoints.size() == 0 &&
- _template_descriptors.empty()){
- return false;
- }
- else {
- return true;
- }
- }
-
-
- int set_template(ros::ServiceClient client){
- posedetection_msgs::Feature0DDetect srv;
-
- if ( _template_img.empty()) {
- ROS_ERROR ("template picture is empty.");
- return -1;
- }
- ROS_INFO_STREAM("read template image and call " << client.getService() << " service");
- cv_bridge::CvImage cv_img;
- cv_img.image = cv::Mat(_template_img);
- cv_img.encoding = std::string("bgr8");
- srv.request.image = *(cv_img.toImageMsg());
- if (client.call(srv)) {
- ROS_INFO_STREAM("get features with " << srv.response.features.scales.size() << " descriptoins");
- features2keypoint (srv.response.features, _template_keypoints, _template_descriptors);
-
- // inverse affine translation
- cv::Mat M_inv;
- cv::Mat dst;
- cv::invert(_affine_matrix, M_inv);
- // cv::drawKeypoints (tmp, _template_keypoints, dst, cv::Scalar::all(1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
- // cv::drawKeypoints (tmp, _template_keypoints, dst);
- // cv::warpPerspective(_template_img, dst, M_inv,
- // cv::Size(_original_width_size, _original_height_size),
- // CV_INTER_LINEAR, IPL_BORDER_CONSTANT, 0);
- for (int i = 0; i < (int)_template_keypoints.size(); i++){
- cv::Point2f pt;
- cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
- cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &_template_keypoints.at(i).pt);
- cv::perspectiveTransform (pt_src_mat, pt_mat, M_inv);
- _template_keypoints.at(i).pt = pt_mat.at(0,0);
- }
- // cvSetMouseCallback (_window_name.c_str(), &PointPoseExtractor::cvmousecb, this);
- // _template_img = dst;
- return 0;
- } else {
- ROS_ERROR("Failed to call service Feature0DDetect");
- return 1;
- }
- }
-
-
- double log_fac( int n )
- {
- static std::vector slog_table;
- int osize = slog_table.size();
- if(osize <= n){
- slog_table.resize(n+1);
- if(osize == 0){
- slog_table[0] = -1;
- slog_table[1] = log(1.0);
- osize = 2;
- }
- for(int i = osize; i <= n; i++ ){
- slog_table[i] = slog_table[i-1] + log(i);
- }
- }
- return slog_table[n];
- }
-
- int min_inlier( int n, int m, double p_badsupp, double p_badxform )
- {
- double pi, sum;
- int i, j;
- double lp_badsupp = log( p_badsupp );
- double lp_badsupp1 = log( 1.0 - p_badsupp );
- for( j = m+1; j <= n; j++ ){
- sum = 0;
- for( i = j; i <= n; i++ ){
- pi = (i-m) * lp_badsupp + (n-i+m) * lp_badsupp1 +
- log_fac( n - m ) - log_fac( i - m ) - log_fac( n - i );
- sum += exp( pi );
- }
- if( sum < p_badxform )
- break;
- }
- return j;
- }
-
-
- bool estimate_od (ros::ServiceClient client, cv::Mat src_img,
- std::vector sourceimg_keypoints,
- image_geometry::PinholeCameraModel pcam,
- double err_thr, cv::Mat &stack_img,
- cv::flann::Index* ft, posedetection_msgs::Object6DPose* o6p){
-
- if ( _template_keypoints.size()== 0 &&
- _template_descriptors.empty() ){
- set_template(client);
- }
- if ( _template_keypoints.size()== 0 &&
- _template_descriptors.empty() ) {
- ROS_ERROR ("Template image was not set.");
- return false;
- }
- // stacked image
- cv::Size stack_size = cv::Size(MAX(src_img.cols,_template_img.cols),
- src_img.rows+_template_img.rows);
- stack_img = cv::Mat(stack_size,CV_8UC3);
- stack_img = cv::Scalar(0);
- cv::Mat stack_img_tmp(stack_img,cv::Rect(0,0,_template_img.cols,_template_img.rows));
- cv::add(stack_img_tmp, _template_img, stack_img_tmp);
- cv::Mat stack_img_src(stack_img,cv::Rect(0,_template_img.rows,src_img.cols,src_img.rows));
- cv::add(stack_img_src, src_img, stack_img_src);
- _previous_stack_img = stack_img.clone();
-
- // matching
- cv::Mat m_indices(_template_descriptors.rows, 2, CV_32S);
- cv::Mat m_dists(_template_descriptors.rows, 2, CV_32F);
- ft->knnSearch(_template_descriptors, m_indices, m_dists, 2, cv::flann::SearchParams(-1) );
-
- // matched points
- std::vector pt1, pt2;
- std::vector queryIdxs,trainIdxs;
- for ( unsigned int j = 0; j < _template_keypoints.size(); j++ ) {
- if ( m_dists.at(j,0) < m_dists.at(j,1) * _distanceratio_threshold) {
- queryIdxs.push_back(j);
- trainIdxs.push_back(m_indices.at(j,0));
- }
- }
- if ( queryIdxs.size() == 0 ) {
- ROS_WARN_STREAM("could not find matched points with distanceratio(" <<_distanceratio_threshold << ")");
- } else {
- cv::KeyPoint::convert(_template_keypoints,pt1,queryIdxs);
- cv::KeyPoint::convert(sourceimg_keypoints,pt2,trainIdxs);
- }
-
- ROS_INFO ("Found %d total matches among %d template keypoints", (int)pt2.size(), (int)_template_keypoints.size());
-
- cv::Mat H;
- std::vector mask((int)pt2.size());
-
- if ( pt1.size() > 4 ) {
- // ToDO for curve face
- H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2), mask, CV_RANSAC, _reprojection_threshold);
- }
-
- // draw line
- for (int j = 0; j < (int)pt1.size(); j++){
- cv::Point2f pt, pt_orig;
- cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
- cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &pt1.at(j));
- cv::perspectiveTransform (pt_src_mat, pt_mat, _affine_matrix);
- pt_orig = pt_mat.at(0,0);
- if ( mask.at(j)){
- cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
- CV_RGB(0,255,0), 1,8,0);
- }
- else {
- cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
- CV_RGB(255,0,255), 1,8,0);
- }
- }
- int inlier_sum = 0;
- for (int k=0; k < (int)mask.size(); k++){
- inlier_sum += (int)mask.at(k);
- }
-
- double text_scale = 1.5;
- {
- int fontFace = 0, thickness = 0, baseLine;
- int x, y;
- cv::Size text_size;
- std::string text;
-
- text = "inlier: " + boost::lexical_cast((int)inlier_sum) + " / " + boost::lexical_cast((int)pt2.size());
- text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
- x = stack_img.size().width - text_size.width;
- y = text_size.height + thickness + 10; // 10pt pading
- cv::putText (stack_img, text, cv::Point(x, y),
- fontFace, text_scale, CV_RGB(0, 255, 0),
- thickness, 8, false);
-
- text = "template: " + boost::lexical_cast((int)_template_keypoints.size());
- text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
- x = stack_img.size().width - text_size.width;
- y += text_size.height + thickness + 10; // 10pt pading
- cv::putText (stack_img, text, cv::Point(x, y),
- fontFace, text_scale, CV_RGB(0, 255, 0),
- thickness, 8, false);
- }
-
- // draw correspondances
- ROS_INFO(" _correspondances.size: %d", (int)_correspondances.size());
- for (int j = 0; j < (int)_correspondances.size(); j++){
- cv::circle(stack_img, cv::Point2f(_correspondances.at(j).x, _correspondances.at(j).y + _template_img.size().height),
- 8, CV_RGB(255,0,0), -1);
- }
-
- ROS_INFO(" inlier_sum:%d min_lier:%d", inlier_sum, min_inlier((int)pt2.size(), 4, 0.10, 0.01));
- if ((cv::countNonZero( H ) == 0) || (inlier_sum < min_inlier((int)pt2.size(), 4, 0.10, 0.01))){
- ROS_INFO(" inlier_sum < min_lier return-from estimate-od");
- if( _window_name != "" )
- cv::imshow(_window_name, stack_img);
- return false;
- }
-
- std::string _type;
- char chr[20];
-
- // number of match points
- sprintf(chr, "%d", (int)pt2.size());
- _type = _matching_frame + "_" + std::string(chr);
-
- sprintf(chr, "%d", inlier_sum);
- _type = _type + "_" + std::string(chr);
-
- cv::Point2f corners2d[4] = {cv::Point2f(0,0),
- cv::Point2f(_original_width_size,0),
- cv::Point2f(_original_width_size,_original_height_size),
- cv::Point2f(0,_original_height_size)};
- cv::Mat corners2d_mat (cv::Size(4, 1), CV_32FC2, corners2d);
- cv::Point3f corners3d[4] = {cv::Point3f(0,0,0),
- cv::Point3f(0,_template_width,0),
- cv::Point3f(_template_height,_template_width,0),
- cv::Point3f(_template_height,0,0)};
- cv::Mat corners3d_mat (cv::Size(4, 1), CV_32FC3, corners3d);
-
- cv::Mat corners2d_mat_trans;
-
- cv::perspectiveTransform (corners2d_mat, corners2d_mat_trans, H);
-
- double fR3[3], fT3[3];
- cv::Mat rvec(3, 1, CV_64FC1, fR3);
- cv::Mat tvec(3, 1, CV_64FC1, fT3);
- cv::Mat zero_distortion_mat = cv::Mat::zeros(4, 1, CV_64FC1);
-
- cv::solvePnP (corners3d_mat, corners2d_mat_trans,
- pcam.intrinsicMatrix(),
- zero_distortion_mat,//if unrectified: pcam.distortionCoeffs()
- rvec, tvec);
-
- tf::Transform checktf, resulttf;
-
- checktf.setOrigin( tf::Vector3(fT3[0], fT3[1], fT3[2] ) );
- double rx = fR3[0], ry = fR3[1], rz = fR3[2];
- tf::Quaternion quat;
- double angle = cv::norm(rvec);
- quat.setRotation(tf::Vector3(rx/angle, ry/angle, rz/angle), angle);
- checktf.setRotation( quat );
-
- resulttf = checktf * _relativepose;
-
- ROS_INFO( " tx: (%0.2lf,%0.2lf,%0.2lf) rx: (%0.2lf,%0.2lf,%0.2lf)",
- resulttf.getOrigin().getX(),
- resulttf.getOrigin().getY(),
- resulttf.getOrigin().getZ(),
- resulttf.getRotation().getAxis().x() * resulttf.getRotation().getAngle(),
- resulttf.getRotation().getAxis().y() * resulttf.getRotation().getAngle(),
- resulttf.getRotation().getAxis().z() * resulttf.getRotation().getAngle());
-
- o6p->pose.position.x = resulttf.getOrigin().getX();
- o6p->pose.position.y = resulttf.getOrigin().getY();
- o6p->pose.position.z = resulttf.getOrigin().getZ();
- o6p->pose.orientation.w = resulttf.getRotation().w();
- o6p->pose.orientation.x = resulttf.getRotation().x();
- o6p->pose.orientation.y = resulttf.getRotation().y();
- o6p->pose.orientation.z = resulttf.getRotation().z();
- o6p->type = _matching_frame; // _type
-
- // draw 3d cube model
- std::vector projected_top;
- {
- tf::Vector3 coords[8] = {tf::Vector3(0,0,0),
- tf::Vector3(0, _template_width, 0),
- tf::Vector3(_template_height, _template_width,0),
- tf::Vector3(_template_height, 0, 0),
- tf::Vector3(0, 0, -0.03),
- tf::Vector3(0, _template_width, -0.03),
- tf::Vector3(_template_height, _template_width, -0.03),
- tf::Vector3(_template_height, 0, -0.03)};
-
- projected_top = std::vector(8);
-
- for(int i=0; i<8; i++) {
- coords[i] = checktf * coords[i];
- cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());
- projected_top[i] = pcam.project3dToPixel(pt);
- }
- }
-
- { // check if the matched region does not too big or too small
- float max_x, max_y, min_x, min_y;
- max_x = max_y = -1e9;
- min_x = min_y = 1e9;
- for (int j = 0; j < 4; j++){
- cv::Point2f pt = corners2d_mat_trans.at(0,j);
- max_x = std::max(max_x, pt.x), max_y = std::max(max_y, pt.y);
- min_x = std::min(min_x, pt.x), min_y = std::min(min_y, pt.y);
- }
- if((max_x - min_x) < 30 || (max_y - min_y) < 30 ||
- src_img.rows < (max_x - min_x)/2 || src_img.cols < (max_y - min_y)/2){
- ROS_INFO(" matched region is too big or small (2< && <30) width:%f height:%f return-from estimate-od", max_x - min_x, max_y - min_y);
- return false;
- }
- }
-
- double err_sum = 0;
- bool err_success = true;
- for (int j = 0; j < 4; j++){
- double err = sqrt(pow((corners2d_mat_trans.at(0,j).x - projected_top.at(j).x), 2) +
- pow((corners2d_mat_trans.at(0,j).y - projected_top.at(j).y), 2));
- err_sum += err;
- }
- if (err_sum > err_thr){
- ROS_INFO(" err_sum:%f > err_thr:%f return-from estimate-od", err_sum, err_thr);
- err_success = false;
- } else {
- o6p->reliability = 1.0 - (err_sum / err_thr);
- }
- // draw lines around the detected object
- for (int j = 0; j < corners2d_mat_trans.cols; j++){
- cv::Point2f p1(corners2d_mat_trans.at(0,j).x,
- corners2d_mat_trans.at(0,j).y+_template_img.rows);
- cv::Point2f p2(corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).x,
- corners2d_mat_trans.at(0,(j+1)%corners2d_mat_trans.cols).y+_template_img.rows);
- cv::line (stack_img, p1, p2, CV_RGB(255, 0, 0),
- (err_success?4:1), // width
- CV_AA, 0);
- }
-
- // draw 3d cube model
- if(projected_top.size() == 8) { // verify, the size is 8
- int cnt = 8;
- std::vector ps(cnt);
- for(int i=0; i ps(4);
-
- for(int i=0; i<4; i++) {
- coords[i] = resulttf * coords[i];
- cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());
- ps[i] = pcam.project3dToPixel(pt);
- ps[i].y += _template_img.rows; // draw on camera image
- }
-
- cv::line (stack_img, ps[0], ps[1], CV_RGB(255, 0, 0), 3, CV_AA, 0);
- cv::line (stack_img, ps[0], ps[2], CV_RGB(0, 255, 0), 3, CV_AA, 0);
- cv::line (stack_img, ps[0], ps[3], CV_RGB(0, 0, 255), 3, CV_AA, 0);
- }
-
- // write text on image
- {
- std::string text;
- int x, y;
- text = "error: " + boost::lexical_cast(err_sum);
- x = stack_img.size().width - 16*17*text_scale; // 16pt * 17
- y = _template_img.size().height - (16 + 2)*text_scale*6;
- cv::putText (stack_img, text, cv::Point(x, y),
- 0, text_scale, CV_RGB(0, 255, 0),
- 2, 8, false);
- ROS_INFO(" %s < %f (threshold)", text.c_str(), err_thr );
- }
- // for debug window
- if( _window_name != "" )
- cv::imshow(_window_name, stack_img);
-
- return err_success;
- }
-}; // the end of difinition of class Matching_Template
-
-
-class PointPoseExtractor
+namespace jsk_perception
{
- ros::NodeHandle _n;
- image_transport::ImageTransport it;
- ros::Subscriber _sub;
- ros::ServiceServer _server;
- ros::ServiceClient _client;
- ros::Publisher _pub, _pub_agg, _pub_pose;
- tf::TransformBroadcaster _br;
- image_transport::Publisher _debug_pub;
- double _reprojection_threshold;
- double _distanceratio_threshold;
- double _th_step;
- double _phi_step;
- bool _autosize;
- double _err_thr;
- static std::vector _templates;
- image_geometry::PinholeCameraModel pcam;
- bool pnod;
- bool _initialized;
- bool _viewer;
- bool _publish_tf;
- std::string _child_frame_id;
-
-public:
- PointPoseExtractor() : it(ros::NodeHandle("~")) {
- // _sub = _n.subscribe("ImageFeature0D", 1,
- // &PointPoseExtractor::imagefeature_cb, this);
- _client = _n.serviceClient("Feature0DDetect");
- _pub = _n.advertise("ObjectDetection", 10);
- _pub_agg = _n.advertise("ObjectDetection_agg", 10);
- _pub_pose = _n.advertise("object_pose", 10);
- _debug_pub = it.advertise("debug_image", 1);
- _server = _n.advertiseService("SetTemplate", &PointPoseExtractor::settemplate_cb, this);
- _initialized = false;
- }
-
- virtual ~PointPoseExtractor(){
+ PointPoseExtractor::~PointPoseExtractor(){
_sub.shutdown();
_client.shutdown();
_pub.shutdown();
_pub_agg.shutdown();
}
- static void make_template_from_mousecb(Matching_Template *mt){
- cv::Mat H;
- cv::Mat tmp_template, tmp_warp_template;
- std::vectorpt1, pt2;
- double width, height;
- std::string filename;
- std::cout << "input template's [width]" << std::endl;
- std::cin >> width;
- std::cout << "input template's [height]" << std::endl;
- std::cin >> height;
- std::cout << "input template's [filename]" << std::endl;
- std::cin >> filename;
-
- for (int i = 0; i < 4; i++){
- pt1.push_back(cv::Point2d((int)mt->_correspondances.at(i).x,
- (int)mt->_correspondances.at(i).y + mt->_template_img.size().height));
- }
- cv::Rect rect = cv::boundingRect(cv::Mat(pt1));
- double scale = std::max(width, height) / 500.0;
- int iwidth = width / scale, iheight = height / scale;
- pt2.push_back(cv::Point2d(0,0));
- pt2.push_back(cv::Point2d(iwidth,0));
- pt2.push_back(cv::Point2d(iwidth,iheight));
- pt2.push_back(cv::Point2d(0, iheight));
- H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2));
-
- cv::getRectSubPix(mt->_previous_stack_img, rect.size(),
- cv::Point2f((rect.tl().x + rect.br().x)/2.0,(rect.tl().y + rect.br().y)/2.0),
- tmp_template);
- cv::warpPerspective(mt->_previous_stack_img, tmp_warp_template, H, cv::Size(iwidth, iheight));
-
- try {
- cv::imwrite(filename,tmp_template);
- boost::filesystem::path fname(filename);
- std::stringstream ss;
- ss << fname.stem() << "_wrap" << fname.extension();
- cv::imwrite(ss.str(),tmp_warp_template);
- }catch (cv::Exception e) {
- std::cerr << e.what() << std::endl;
- }
-
- for (int i = 0; i < (int)pt1.size(); i++){
- pt2.push_back(cv::Point2d((int)pt1.at(i).x - rect.x,
- (int)pt1.at(i).y - rect.y - mt->_template_img.size().height));
- }
- // cv::Mat mask_img = cv::Mat::zeros(tmp_template.size(), CV_8UC3);
- // cv::fillConvexPoly(mask_img, pt2.begin(), (int)pt2.size(), CV_RGB(255, 255, 255));
-
- // cv::namedWindow("hoge", 1);
- // cv::imshow("hoge", mask_img);
-
- cv::Mat M = (cv::Mat_(3,3) << 1,0,0, 0,1,0, 0,0,1);
- std::string window_name = "sample" + boost::lexical_cast((int)_templates.size());
-
- Matching_Template* tmplt =
- new Matching_Template (tmp_warp_template, "sample",
- tmp_warp_template.size().width, tmp_warp_template.size().height,
- width, height,
- tf::Transform(tf::Quaternion(0, 0, 0, 1), tf::Vector3(0, 0, 0)),
- M,
- mt->_reprojection_threshold,
- mt->_distanceratio_threshold,
- _first_sample_change ? window_name : mt->_window_name,
- cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
-
- mt->_correspondances.clear();
- _templates.push_back(tmplt);
- cv::namedWindow(_first_sample_change ? window_name : mt->_window_name,
- cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
- cvSetMouseCallback (_first_sample_change ? window_name.c_str() : mt->_window_name.c_str(),
- &cvmousecb, static_cast(_templates.back()));
- _first_sample_change = true;
- }
-
- static void cvmousecb (int event, int x, int y, int flags, void* param){
- Matching_Template *mt = (Matching_Template*)param;
- // std::cerr << "mousecb_ -> " << mt << std::endl;
- switch (event){
- case CV_EVENT_LBUTTONUP: {
- cv::Point2d pt(x,y - (int)mt->_template_img.size().height);
- ROS_INFO("add correspondence (%d, %d)", (int)pt.x, (int)pt.y);
- mt->_correspondances.push_back(pt);
- if ((int)mt->_correspondances.size() >= 4){
- make_template_from_mousecb(mt);
- mt->_correspondances.clear();
- ROS_INFO("reset");
- }
- break;
- }
- case CV_EVENT_RBUTTONUP: {
- mt->_correspondances.clear();
- ROS_INFO("reset");
- break;
- }
- }
- }
-
-
- void initialize () {
+ void PointPoseExtractor::initialize () {
std::string _pose_str;
double template_width;
double template_height;
std::string template_filename;
std::string window_name;
- ros::NodeHandle local_nh("~");
- local_nh.param("child_frame_id", _child_frame_id, std::string("matching"));
- local_nh.param("object_width", template_width, 0.06);
- local_nh.param("object_height", template_height, 0.0739);
- local_nh.param("relative_pose", _pose_str, std::string("0 0 0 0 0 0 1"));
+ pnh_->param("child_frame_id", _child_frame_id, std::string("matching"));
+ pnh_->param("object_width", template_width, 0.06);
+ pnh_->param("object_height", template_height, 0.0739);
+ pnh_->param("relative_pose", _pose_str, std::string("0 0 0 0 0 0 1"));
std::string default_template_file_name;
try {
#ifdef ROSPACK_EXPORT
@@ -719,19 +71,17 @@ class PointPoseExtractor
#endif
} catch (std::runtime_error &e) {
}
- local_nh.param("template_filename", template_filename, default_template_file_name);
- local_nh.param("reprojection_threshold", _reprojection_threshold, 3.0);
- local_nh.param("distanceratio_threshold", _distanceratio_threshold, 0.49);
- local_nh.param("error_threshold", _err_thr, 50.0);
- local_nh.param("theta_step", _th_step, 5.0);
- local_nh.param("phi_step", _phi_step, 5.0);
- local_nh.param("viewer_window", _viewer, true);
- local_nh.param("window_name", window_name, std::string("sample1"));
- local_nh.param("autosize", _autosize, false);
- local_nh.param("publish_null_object_detection", pnod, false);
- local_nh.param("publish_tf", _publish_tf, false);
-
- _first_sample_change = false;
+ pnh_->param("template_filename", template_filename, default_template_file_name);
+ pnh_->param("reprojection_threshold", _reprojection_threshold, 3.0);
+ pnh_->param("distanceratio_threshold", _distanceratio_threshold, 0.49);
+ pnh_->param("error_threshold", _err_thr, 50.0);
+ pnh_->param("theta_step", _th_step, 5.0);
+ pnh_->param("phi_step", _phi_step, 5.0);
+ pnh_->param("viewer_window", _viewer, true);
+ pnh_->param("window_name", window_name, std::string("sample1"));
+ pnh_->param("autosize", _autosize, false);
+ pnh_->param("publish_null_object_detection", pnod, false);
+ pnh_->param("publish_tf", _publish_tf, false);
// make one template
cv::Mat template_img;
@@ -741,6 +91,8 @@ class PointPoseExtractor
return;
}
+ _first_sample_change = false;
+
// relative pose
std::vector rv(7);
std::istringstream iss(_pose_str);
@@ -750,11 +102,11 @@ class PointPoseExtractor
if (iss.eof()) { // use rpy expression
transform = tf::Transform(tf::createQuaternionFromRPY(rv[3], rv[4], rv[5]),
- tf::Vector3(rv[0], rv[1], rv[2]));
+ tf::Vector3(rv[0], rv[1], rv[2]));
} else { // use quaternion expression
iss >> rv[6];
transform = tf::Transform(tf::Quaternion(rv[3], rv[4], rv[5], rv[6]),
- tf::Vector3(rv[0], rv[1], rv[2]));
+ tf::Vector3(rv[0], rv[1], rv[2]));
}
// add the image to template list
@@ -764,8 +116,8 @@ class PointPoseExtractor
} // initialize
- cv::Mat make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec,
- double template_width, double template_height, cv::Size &size){
+ cv::Mat PointPoseExtractor::make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec,
+ double template_width, double template_height, cv::Size &size){
cv::Point3f coner[4] = {cv::Point3f(-(template_width/2.0), -(template_height/2.0),0),
cv::Point3f(template_width/2.0,-(template_height/2.0),0),
@@ -806,11 +158,11 @@ class PointPoseExtractor
}
- int make_warped_images (cv::Mat src, std::vector &imgs,
- std::vector &Mvec,
- double template_width, double template_height,
- double th_step, double phi_step){
-
+ int PointPoseExtractor::make_warped_images (cv::Mat src, std::vector &imgs,
+ std::vector &Mvec,
+ double template_width, double template_height,
+ double th_step, double phi_step){
+
std::vector sizevec;
for (int i = (int)((-3.14/4.0)/th_step); i*th_step < 3.14/4.0; i++){
@@ -831,11 +183,13 @@ class PointPoseExtractor
cv::Mat M;
cv::Size size;
M = make_homography(src, rvec, tvec, template_width, template_height, size);
- Mvec.push_back(M);
- sizevec.push_back(size);
+ //Whenever an H matrix cannot be estimated, cv::findHomography returns an empty one.
+ if(!M.empty()){
+ Mvec.push_back(M);
+ sizevec.push_back(size);
+ }
}
}
-
for (int i = 0; i < (int)Mvec.size(); i++){
cv::Mat dst;
cv::warpPerspective(src, dst, Mvec.at(i), sizevec.at(i),
@@ -845,9 +199,9 @@ class PointPoseExtractor
return 0;
}
- bool add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose,
- double template_width, double template_height,
- double theta_step=5.0, double phi_step=5.0)
+ bool PointPoseExtractor::add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose,
+ double template_width, double template_height,
+ double theta_step=5.0, double phi_step=5.0)
{
std::vector imgs;
std::vector Mvec;
@@ -872,36 +226,35 @@ class PointPoseExtractor
(_viewer ? type : ""), _autosize);
_templates.push_back(tmplt);
if( _viewer )
- {
- cv::namedWindow(type, _autosize ? CV_WINDOW_AUTOSIZE : 0);
- cvSetMouseCallback (type.c_str(), &cvmousecb, static_cast(_templates.back()));
- }
+ {
+ cv::namedWindow(type, _autosize ? CV_WINDOW_AUTOSIZE : 0);
+ cvSetMouseCallback (type.c_str(), &cvmousecb, this);
+ }
}
return true;
}
- bool settemplate_cb (jsk_perception::SetTemplate::Request &req,
- jsk_perception::SetTemplate::Response &res){
- cv_bridge::CvImagePtr cv_ptr;
- cv_ptr = cv_bridge::toCvCopy(req.image, enc::BGR8);
- cv::Mat img(cv_ptr->image);
+ bool PointPoseExtractor::settemplate_cb (jsk_perception::SetTemplate::Request &req,
+ jsk_perception::SetTemplate::Response &res){
+ if ( !_initialized ) {
+ ROS_WARN("SetTemplate service is available only after receiving input ImageFeature0D");
+ return false;
+ }
+ cv_bridge::CvImagePtr cv_ptr;
+ cv_ptr = cv_bridge::toCvCopy(req.image, enc::BGR8);
+ cv::Mat img(cv_ptr->image);
- tf::Transform transform(tf::Quaternion(req.relativepose.orientation.x,
- req.relativepose.orientation.y,
- req.relativepose.orientation.z,
- req.relativepose.orientation.w),
- tf::Vector3(req.relativepose.position.x,
- req.relativepose.position.y,
- req.relativepose.position.z));
+ tf::Transform transform;
+ tf::poseMsgToTF(req.relativepose, transform);
- // add the image to template list
- add_new_template(img, req.type, transform,
- req.dimx, req.dimy, 1.0, 1.0);
- return true;
+ // add the image to template list
+ add_new_template(img, req.type, transform,
+ req.dimx, req.dimy, 1.0, 1.0);
+ return true;
}
- void imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg){
+ void PointPoseExtractor::imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg){
std::vector sourceimg_keypoints;
cv::Mat sourceimg_descriptors;
std::vector vo6p;
@@ -957,31 +310,22 @@ class PointPoseExtractor
// Publish result as geometry_msgs/PoseStamped. But it can only contain one object
geometry_msgs::PoseStamped pose_msg;
pose_msg.header = od.header;
- pose_msg.pose = od.objects[0].pose;
- _pub_pose.publish(pose_msg);
+ if ((int)vo6p.size() != 0) {
+ pose_msg.pose = od.objects[0].pose;
+ _pub_pose.publish(pose_msg);
+ }
// broadcast tf
if ( this->_publish_tf ) {
- tf::Transform transform(
- tf::Quaternion(
- pose_msg.pose.orientation.x,
- pose_msg.pose.orientation.y,
- pose_msg.pose.orientation.z,
- pose_msg.pose.orientation.w
- ),
- tf::Vector3(
- pose_msg.pose.position.x,
- pose_msg.pose.position.y,
- pose_msg.pose.position.z
- )
- );
+ tf::Transform transform;
+ tf::poseMsgToTF(pose_msg.pose, transform);
_br.sendTransform(
- tf::StampedTransform(
- transform,
- msg->image.header.stamp,
- msg->image.header.frame_id,
- _child_frame_id
- )
- );
+ tf::StampedTransform(
+ transform,
+ msg->image.header.stamp,
+ msg->image.header.frame_id,
+ _child_frame_id
+ )
+ );
}
}
}
@@ -991,28 +335,43 @@ class PointPoseExtractor
cv::waitKey( 10 );
}
- /* if there are subscribers of the output topic -> do work
- else if -> unregister all topics this node subscribing
- */
- void check_subscribers()
+ void PointPoseExtractor::onInit()
{
- if(_pub.getNumSubscribers() == 0 && _initialized) {
- if(_sub)
- _sub.shutdown();
- static int i = 0;
- if ( i++ % 100 == 0 ) {
- ROS_INFO("wait for subscriberes ... %s", _pub.getTopic().c_str());
- }
- } else {
- if(!_sub)
- _sub = _n.subscribe("ImageFeature0D", 1,
- &PointPoseExtractor::imagefeature_cb, this);
- }
+ DiagnosticNodelet::onInit();
+ dynamic_reconfigure::Server server;
+ dynamic_reconfigure::Server::CallbackType f;
+ f = boost::bind(&PointPoseExtractor::dyn_conf_callback, this, _1, _2);
+ server.setCallback(f);
+
+ it = new image_transport::ImageTransport(*pnh_);
+ // Use nh_ instead of pnh_ for backward compatibility.
+ // See https://github.com/jsk-ros-pkg/jsk_recognition/pull/2779 and
+ // https://github.com/jsk-ros-pkg/jsk_recognition/pull/2778
+ _client = nh_->serviceClient("Feature0DDetect");
+ _pub = advertise(*nh_, "ObjectDetection", 10);
+ _pub_agg = advertise(*nh_, "ObjectDetection_agg", 10);
+ _pub_pose = advertise(*nh_, "object_pose", 10);
+ _debug_pub = it->advertise("debug_image", 1);
+ _server = nh_->advertiseService("SetTemplate", &PointPoseExtractor::settemplate_cb, this);
+ _initialized = false;
+
+ onInitPostProcess();
+ }
+
+ void PointPoseExtractor::subscribe()
+ {
+ _sub = nh_->subscribe("ImageFeature0D", 1,
+ &PointPoseExtractor::imagefeature_cb, this);
+ }
+
+ void PointPoseExtractor::unsubscribe()
+ {
+ _sub.shutdown();
}
/* callback for dynamic reconfigure */
- void dyn_conf_callback(jsk_perception::point_pose_extractorConfig &config,
- uint32_t level) {
+ void PointPoseExtractor::dyn_conf_callback(Config &config,
+ uint32_t level) {
std::cout << "id = " << config.template_id << std::endl;
std::cout << "lvl = " << level << std::endl;
if((int)_templates.size() <= config.template_id) {
@@ -1039,25 +398,5 @@ class PointPoseExtractor
}; // the end of definition of class PointPoseExtractor
-
-std::vector PointPoseExtractor::_templates;
-
-int main (int argc, char **argv){
- ros::init (argc, argv, "PointPoseExtractor");
-
- PointPoseExtractor matcher;
-
- dynamic_reconfigure::Server server;
- dynamic_reconfigure::Server::CallbackType f;
- f = boost::bind(&PointPoseExtractor::dyn_conf_callback, &matcher, _1, _2);
- server.setCallback(f);
-
- ros::Rate r(10); // 10 hz
- while(ros::ok()) {
- ros::spinOnce();
- matcher.check_subscribers();
- r.sleep();
- }
-
- return 0;
-}
+#include
+PLUGINLIB_EXPORT_CLASS (jsk_perception::PointPoseExtractor, nodelet::Nodelet);
diff --git a/jsk_perception/src/split_image.cpp b/jsk_perception/src/split_image.cpp
new file mode 100644
index 0000000000..e84e7280bd
--- /dev/null
+++ b/jsk_perception/src/split_image.cpp
@@ -0,0 +1,101 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2023, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * split_image.cpp
+ * Author: Yoshiki Obinata
+ */
+
+
+#include "jsk_perception/split_image.h"
+
+namespace jsk_perception{
+
+ void SplitImage::onInit(){
+ DiagnosticNodelet::always_subscribe_ = true;
+ DiagnosticNodelet::onInit();
+ pnh_->param("vertical_parts", vertical_parts_, 1);
+ pnh_->param("horizontal_parts", horizontal_parts_, 1);
+ for(int i=0; i(*pnh_, pub_name, 1);
+ pubs_.push_back(pub_);
+ }
+ }
+ onInitPostProcess();
+ }
+
+ SplitImage::~SplitImage(){}
+
+ void SplitImage::subscribe(){
+ sub_ = pnh_->subscribe("input", 1, &SplitImage::splitImage, this);
+ }
+
+ void SplitImage::unsubscribe(){
+ sub_.shutdown();
+ }
+
+ void SplitImage::splitImage(const sensor_msgs::Image::ConstPtr& msg){
+ cv_bridge::CvImagePtr cv_ptr;
+ try{
+ cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
+ }catch(cv_bridge::Exception& e){
+ NODELET_ERROR("cv_bridge exception: %s", e.what());
+ return;
+ }
+ cv::Mat image = cv_ptr->image;
+ int height = image.rows;
+ int width = image.cols;
+ int vertical_size = height / vertical_parts_;
+ int horizontal_size = width / horizontal_parts_;
+ for(int i=0; iheader = msg->header;
+ part_cv_ptr->encoding = sensor_msgs::image_encodings::BGR8;
+ part_cv_ptr->image = part_image;
+ sensor_msgs::ImagePtr part_msg = part_cv_ptr->toImageMsg();
+ pubs_.at(i*horizontal_parts_+j).publish(part_msg);
+ }
+ }
+ }
+}
+
+#include
+PLUGINLIB_EXPORT_CLASS(jsk_perception::SplitImage, nodelet::Nodelet);
diff --git a/jsk_perception/src/video_to_scene.cpp b/jsk_perception/src/video_to_scene.cpp
new file mode 100644
index 0000000000..6d323d9cb0
--- /dev/null
+++ b/jsk_perception/src/video_to_scene.cpp
@@ -0,0 +1,112 @@
+// -*- mode: C++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright (c) 2022, JSK Lab
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ * * Neither the name of the JSK Lab nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * video_to_scene.cpp
+ * Author: Kei Okada
+ */
+
+#include "jsk_perception/video_to_scene.h"
+#include
+
+namespace jsk_perception{
+ void VideoToScene::onInit(){
+ DiagnosticNodelet::onInit();
+
+ bgsubtractor = cv::bgsegm::createBackgroundSubtractorGMG();
+ pnh_->param("min_percent", min_percent_, 5);
+ pnh_->param("max_percent", max_percent_, 20);
+ captured_ = false;
+
+ srv_ = boost::make_shared > (*pnh_);
+ dynamic_reconfigure::Server::CallbackType f =
+ boost::bind(&VideoToScene::configCallback, this, _1, _2);
+ srv_->setCallback (f);
+
+ //pub_ = advertise(*pnh_, "output", 1);
+ pub_ = advertiseImage(*pnh_, "output", 1);
+
+ onInitPostProcess();
+ }
+
+ void VideoToScene::subscribe(){
+ std::string transport;
+ pnh_->param("image_transport", transport, std::string("raw"));
+ NODELET_INFO_STREAM("Using transport \"" << transport << "\"");
+ image_transport::TransportHints hints(transport, ros::TransportHints(), *pnh_);
+ //
+ it_.reset(new image_transport::ImageTransport(*pnh_));
+ sub_ = it_->subscribe(pnh_->resolveName("input"), 1, &VideoToScene::work, this, hints);
+ ros::V_string names = boost::assign::list_of("input");
+ jsk_topic_tools::warnNoRemap(names);
+ }
+
+ void VideoToScene::unsubscribe(){
+ sub_.shutdown();
+ }
+
+ void VideoToScene::work(const sensor_msgs::Image::ConstPtr& image_msg){
+ cv::Mat image;
+
+ vital_checker_ -> poke();
+ boost::mutex::scoped_lock lock(mutex_);
+
+ image = cv_bridge::toCvShare(image_msg, sensor_msgs::image_encodings::RGB8) -> image;
+ cv::resize(image, image, cv::Size(), 300.0/image.cols, 300.0/image.cols);
+
+ cv::Mat bgmask;
+ bgsubtractor->apply(image, bgmask);
+ cv::erode(bgmask, bgmask, cv::Mat(), cv::Point(-1, -1), 2);
+ cv::dilate(bgmask, bgmask, cv::Mat(), cv::Point(-1, -1), 2);
+
+ int p = cv::countNonZero(bgmask) / float(bgmask.cols * bgmask.rows) * 100;
+ NODELET_DEBUG_STREAM("p = " << p << ", min_percent = " << min_percent_ << ", max_percent = " << max_percent_ << ", captured = " << captured_);
+
+ if ( p < min_percent_ && !captured_ ) {
+ captured_ = true;
+ pub_.publish(image_msg);
+ } else if ( captured_ && p >= max_percent_ ) {
+ captured_ = false;
+ }
+ }
+
+ void VideoToScene::configCallback(Config &config, uint32_t level){
+ boost::mutex::scoped_lock lock(mutex_);
+ min_percent_ = config.min_percent;
+ max_percent_ = config.max_percent;
+ }
+}
+
+#include
+PLUGINLIB_EXPORT_CLASS (jsk_perception::VideoToScene, nodelet::Nodelet);
diff --git a/jsk_perception/test/apply_context_to_label_probability.test b/jsk_perception/test/apply_context_to_label_probability.test
index 301f2c8348..8629c18214 100644
--- a/jsk_perception/test/apply_context_to_label_probability.test
+++ b/jsk_perception/test/apply_context_to_label_probability.test
@@ -8,12 +8,12 @@
+ retry="3" time-limit="120">
topic_0: /apply_context_to_label_probability/output
- timeout_0: 60
+ timeout_0: 120
topic_1: /apply_context_to_label_probability/output/label
- timeout_1: 60
+ timeout_1: 120
diff --git a/jsk_perception/test/deep_sort_tracker.test b/jsk_perception/test/deep_sort_tracker.test
index a421e64707..a12d814fd8 100644
--- a/jsk_perception/test/deep_sort_tracker.test
+++ b/jsk_perception/test/deep_sort_tracker.test
@@ -8,12 +8,12 @@
+ retry="3" time-limit="120">
topic_0: /deep_sort_tracker/output/viz
- timeout_0: 60
+ timeout_0: 120
topic_1: /deep_sort_tracker/output/labels
- timeout_1: 60
+ timeout_1: 120
diff --git a/jsk_perception/test/face_pose_estimation.test b/jsk_perception/test/face_pose_estimation.test
index 35d8e6d5b9..ebb24ce560 100644
--- a/jsk_perception/test/face_pose_estimation.test
+++ b/jsk_perception/test/face_pose_estimation.test
@@ -7,14 +7,14 @@
+ retry="3" time-limit="120">
topic_0: /face_pose_estimation/output/rects
- timeout_0: 60
+ timeout_0: 120
topic_1: /face_pose_estimation/output/pose
- timeout_1: 60
+ timeout_1: 120
topic_2: /face_pose_estimation/output/gender
- timeout_2: 60
+ timeout_2: 120
diff --git a/jsk_perception/test/fcn_object_segmentation.test b/jsk_perception/test/fcn_object_segmentation.test
index dd65968f1e..c7571d8e1c 100644
--- a/jsk_perception/test/fcn_object_segmentation.test
+++ b/jsk_perception/test/fcn_object_segmentation.test
@@ -11,10 +11,10 @@
+ time-limit="120">
topic_0: /fcn_object_segmentation/output
- timeout_0: 60
+ timeout_0: 120
diff --git a/jsk_perception/test/human_mesh_recovery.test b/jsk_perception/test/human_mesh_recovery.test
index e59a9acf20..1992a43d0b 100644
--- a/jsk_perception/test/human_mesh_recovery.test
+++ b/jsk_perception/test/human_mesh_recovery.test
@@ -7,10 +7,10 @@
+ retry="3" time-limit="120">
topic_0: /human_mesh_recovery/output/pose
- timeout_0: 60
+ timeout_0: 100
diff --git a/jsk_perception/test/invert_mask_image.test b/jsk_perception/test/invert_mask_image.test
new file mode 100644
index 0000000000..73dff168cc
--- /dev/null
+++ b/jsk_perception/test/invert_mask_image.test
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+ topic_0: /invert_mask_image/output
+ timeout_0: 30
+ check_after_kill_node: true
+ node_names: [invert_mask_image,]
+
+
+
+
diff --git a/jsk_perception/test/label_image_classifier.test b/jsk_perception/test/label_image_classifier.test
index ba37c915b7..c4e476bf14 100644
--- a/jsk_perception/test/label_image_classifier.test
+++ b/jsk_perception/test/label_image_classifier.test
@@ -9,10 +9,10 @@
+ retry="3" time-limit="120">
topic_0: /label_image_classifier/output
- timeout_0: 60
+ timeout_0: 120
diff --git a/jsk_perception/test/people_pose_estimation_2d.test b/jsk_perception/test/people_pose_estimation_2d.test
index 5fcd015d13..af4fc84494 100644
--- a/jsk_perception/test/people_pose_estimation_2d.test
+++ b/jsk_perception/test/people_pose_estimation_2d.test
@@ -7,16 +7,16 @@
+ retry="3" time-limit="120">
topic_0: /people_pose_estimation_2d/output
- timeout_0: 60
+ timeout_0: 120
topic_1: /people_pose_estimation_2d/pose
- timeout_1: 60
+ timeout_1: 120
topic_2: /people_pose_estimation_2d/pose_2d
- timeout_2: 60
+ timeout_2: 120
topic_3: /people_pose_estimation_2d/skeleton
- timeout_3: 60
+ timeout_3: 120
diff --git a/jsk_perception/test/point_pose_extractor.test b/jsk_perception/test/point_pose_extractor.test
new file mode 100644
index 0000000000..102747e55b
--- /dev/null
+++ b/jsk_perception/test/point_pose_extractor.test
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+ topic_0: /ObjectDetection
+ timeout_0: 30
+ topic_1: /ObjectDetection_agg
+ timeout_1: 30
+ topic_2: /object_pose
+ timeout_2: 30
+ topic_3: /point_pose_extractor/debug_image
+ timeout_3: 30
+
+
+
+
diff --git a/jsk_perception/test/point_pose_extractor_group_ns.test b/jsk_perception/test/point_pose_extractor_group_ns.test
new file mode 100644
index 0000000000..0ed56ad043
--- /dev/null
+++ b/jsk_perception/test/point_pose_extractor_group_ns.test
@@ -0,0 +1,50 @@
+
+
+
+
+
+
+
+ file_name: $(find jsk_perception)/sample/image/items_in_shelf.jpg
+ encoding: bgr8
+ publish_info: true
+ fovx: 84.1
+ fovy: 53.8
+
+
+
+
+
+
+
+
+
+
+ template_filename: $(find jsk_perception)/sample/image/pai_no_mi.jpg
+ template_id: 0
+ object_width: 0.145
+ object_height: 0.128
+ viewer_window: $(arg gui)
+
+
+
+
+
+
+ topic_0: /test_template_matching/ObjectDetection
+ timeout_0: 30
+ topic_1: /test_template_matching/ObjectDetection_agg
+ timeout_1: 30
+ topic_2: /test_template_matching/object_pose
+ timeout_2: 30
+ topic_3: /test_template_matching/point_pose_extractor/debug_image
+ timeout_3: 30
+
+
+
+
diff --git a/jsk_perception/test/probability_image_classifier.test b/jsk_perception/test/probability_image_classifier.test
index 71cb98c7c5..8f25ae32db 100644
--- a/jsk_perception/test/probability_image_classifier.test
+++ b/jsk_perception/test/probability_image_classifier.test
@@ -9,10 +9,10 @@
+ retry="3" time-limit="120">
topic_0: /probability_image_classifier/output
- timeout_0: 60
+ timeout_0: 120
diff --git a/jsk_perception/test/publish_mouse_event.py b/jsk_perception/test/publish_mouse_event.py
new file mode 100755
index 0000000000..899d505fd3
--- /dev/null
+++ b/jsk_perception/test/publish_mouse_event.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# copied from image_view2
+# workaround until https://github.com/jsk-ros-pkg/jsk_common/pull/1774 is merged and released
+#
+
+import cv2
+import numpy as np
+
+import rospy
+from sensor_msgs.msg import Image
+import cv_bridge
+from image_view2.msg import MouseEvent
+
+
+def main():
+ pub_plus = rospy.Publisher('~plus_rect_event', MouseEvent, queue_size=1)
+ pub_minus = rospy.Publisher('~minus_rect_event', MouseEvent, queue_size=1)
+
+ width = int(rospy.get_param('~image_width'))
+ height = int(rospy.get_param('~image_height'))
+ plus_events = [
+ MouseEvent(type=3, x=int(width/4), y=int(height/4), width=width, height=height),
+ MouseEvent(type=4, x=int(width/2), y=int(height/2), width=width, height=height),
+ MouseEvent(type=2, x=int(3*width/4), y=int(3*height/4), width=width, height=height),
+ ]
+ minus_events = [
+ MouseEvent(type=3, x=int(3*width/4), y=int(3*height/4), width=width, height=height),
+ MouseEvent(type=4, x=int(width/2), y=int(height/2), width=width, height=height),
+ MouseEvent(type=2, x=int(width/4), y=int(height/4), width=width, height=height),
+ ]
+ rate = rospy.Rate(10)
+ while not rospy.is_shutdown():
+ for e in plus_events:
+ e.header.stamp = rospy.get_rostime()
+ pub_plus.publish(e)
+ rate.sleep()
+ for e in minus_events:
+ e.header.stamp = rospy.get_rostime()
+ pub_minus.publish(e)
+ rate.sleep()
+
+
+if __name__ == '__main__':
+ rospy.init_node('publish_mouse_event')
+ main()
diff --git a/jsk_perception/test/ssd_object_detector.test b/jsk_perception/test/ssd_object_detector.test
index 24992e14ac..a23ce97b05 100644
--- a/jsk_perception/test/ssd_object_detector.test
+++ b/jsk_perception/test/ssd_object_detector.test
@@ -5,7 +5,7 @@
+ retry="3" time-limit="120" >
topic_0: /ssd_object_detector/output/image
timeout_0: 120
diff --git a/jsk_perception/test/tile_image.test b/jsk_perception/test/tile_image.test
index 73a211d0d1..6996c73781 100644
--- a/jsk_perception/test/tile_image.test
+++ b/jsk_perception/test/tile_image.test
@@ -1,7 +1,7 @@
-
+
+ pkg="jsk_tools" type="test_topic_published.py"
+ retry="3" time-limit="120" >
topic_0: /vgg16_object_recognition/output
- timeout_0: 30
+ timeout_0: 120
topic_1: /vgg16_object_recognition/debug/net_input
- timeout_1: 30
+ timeout_1: 120
diff --git a/jsk_perception/test/video_to_scene.test b/jsk_perception/test/video_to_scene.test
new file mode 100644
index 0000000000..b9b9fb9ce1
--- /dev/null
+++ b/jsk_perception/test/video_to_scene.test
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+ topic_0: /kinect_head/rgb/image_rect_color/output
+ timeout_0: 20
+ topic_1: /kinect_head/rgb/image_rect_color/output/compressed
+ timeout_1: 20
+ check_after_kill_node: true
+ node_names: [video_to_scene,]
+
+
+
+
diff --git a/jsk_recognition_msgs/CMakeLists.txt b/jsk_recognition_msgs/CMakeLists.txt
index 116018c0b2..86e6164943 100644
--- a/jsk_recognition_msgs/CMakeLists.txt
+++ b/jsk_recognition_msgs/CMakeLists.txt
@@ -15,6 +15,7 @@ add_message_files(
Circle2DArray.msg
Circle2D.msg
ClassificationResult.msg
+ ClipResult.msg
ClusterPointIndices.msg
ColorHistogramArray.msg
ColorHistogram.msg
@@ -46,6 +47,8 @@ add_message_files(
PointsArray.msg
PolygonArray.msg
PosedCameraInfo.msg
+ QueryAndProbability.msg
+ QuestionAndAnswerText.msg
RectArray.msg
Rect.msg
RotatedRect.msg
@@ -68,6 +71,7 @@ add_message_files(
TrackingStatus.msg
BoolStamped.msg
VectorArray.msg
+ VQAResult.msg
WeightedPoseArray.msg
ContactSensor.msg
ContactSensorArray.msg
@@ -110,9 +114,14 @@ add_service_files(FILES
WhiteBalance.srv
)
+add_action_files(FILES
+ VQATask.action
+ ClassificationTask.action
+ )
+
generate_messages(
DEPENDENCIES
- std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs
+ std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs actionlib_msgs
)
catkin_package(
CATKIN_DEPENDS std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs
@@ -133,7 +142,7 @@ install(DIRECTORY sample
USE_SOURCE_PERMISSIONS
)
-if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
+if($ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
install(DIRECTORY scripts
DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
USE_SOURCE_PERMISSIONS
diff --git a/jsk_recognition_msgs/action/ClassificationTask.action b/jsk_recognition_msgs/action/ClassificationTask.action
new file mode 100644
index 0000000000..417235cc2c
--- /dev/null
+++ b/jsk_recognition_msgs/action/ClassificationTask.action
@@ -0,0 +1,8 @@
+sensor_msgs/Image image
+sensor_msgs/CompressedImage compressed_image
+string[] queries
+---
+jsk_recognition_msgs/ClassificationResult result
+bool done
+---
+string status
diff --git a/jsk_recognition_msgs/action/VQATask.action b/jsk_recognition_msgs/action/VQATask.action
new file mode 100644
index 0000000000..efd3a0e066
--- /dev/null
+++ b/jsk_recognition_msgs/action/VQATask.action
@@ -0,0 +1,8 @@
+sensor_msgs/Image image
+sensor_msgs/CompressedImage compressed_image
+string[] questions
+---
+jsk_recognition_msgs/VQAResult result
+bool done
+---
+string status
diff --git a/jsk_recognition_msgs/msg/ClipResult.msg b/jsk_recognition_msgs/msg/ClipResult.msg
new file mode 100644
index 0000000000..f20e76c27d
--- /dev/null
+++ b/jsk_recognition_msgs/msg/ClipResult.msg
@@ -0,0 +1,2 @@
+sensor_msgs/Image image
+jsk_recognition_msgs/QueryAndProbability[] result
diff --git a/jsk_recognition_msgs/msg/PanoramaInfo.msg b/jsk_recognition_msgs/msg/PanoramaInfo.msg
index 6dd1be12b1..602d7791bd 100644
--- a/jsk_recognition_msgs/msg/PanoramaInfo.msg
+++ b/jsk_recognition_msgs/msg/PanoramaInfo.msg
@@ -11,7 +11,13 @@ std_msgs/Header header
string projection_model
#######################################################################
-# Field of View Parameters #
+# Image shape info #
+#######################################################################
+uint32 image_height
+uint32 image_width
+
+#######################################################################
+# Field of View Parameters #
#######################################################################
float64 theta_min
float64 theta_max
diff --git a/jsk_recognition_msgs/msg/QueryAndProbability.msg b/jsk_recognition_msgs/msg/QueryAndProbability.msg
new file mode 100644
index 0000000000..3495b19f3d
--- /dev/null
+++ b/jsk_recognition_msgs/msg/QueryAndProbability.msg
@@ -0,0 +1,2 @@
+string query
+float32 probability
diff --git a/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg b/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg
new file mode 100644
index 0000000000..1070abd928
--- /dev/null
+++ b/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg
@@ -0,0 +1,2 @@
+string question
+string answer
diff --git a/jsk_recognition_msgs/msg/VQAResult.msg b/jsk_recognition_msgs/msg/VQAResult.msg
new file mode 100644
index 0000000000..f2cc09c093
--- /dev/null
+++ b/jsk_recognition_msgs/msg/VQAResult.msg
@@ -0,0 +1 @@
+jsk_recognition_msgs/QuestionAndAnswerText[] result
diff --git a/jsk_recognition_utils/package.xml b/jsk_recognition_utils/package.xml
index 237e47bf78..cf7d9e8ce7 100644
--- a/jsk_recognition_utils/package.xml
+++ b/jsk_recognition_utils/package.xml
@@ -15,7 +15,8 @@
python3-setuptools
catkin
- cython
+ cython
+ cython3
eigen_conversions
dynamic_reconfigure
geometry_msgs
@@ -46,7 +47,8 @@
python-skimage
python3-skimage
python-fcn-pip
- python-shapely
+ python-shapely
+ python3-shapely
sensor_msgs
std_msgs
tf2_ros
diff --git a/sound_classification/launch/audio_to_spectrogram.launch b/sound_classification/launch/audio_to_spectrogram.launch
index 7e7511b1d0..afbd67093e 100644
--- a/sound_classification/launch/audio_to_spectrogram.launch
+++ b/sound_classification/launch/audio_to_spectrogram.launch
@@ -59,7 +59,7 @@
-
+
@@ -69,12 +69,12 @@
-
+
-
+
do_dynamic_scaling: true
diff --git a/sound_classification/launch/classify_sound.launch b/sound_classification/launch/classify_sound.launch
index 06325ed57d..276ea93c73 100644
--- a/sound_classification/launch/classify_sound.launch
+++ b/sound_classification/launch/classify_sound.launch
@@ -36,7 +36,7 @@
-
+
gpu: $(arg gpu)
model_name: nin
@@ -47,11 +47,11 @@
-
-
+
+
-
+
diff --git a/sound_classification/launch/save_noise.launch b/sound_classification/launch/save_noise.launch
index d94aaf9c19..2db861520c 100644
--- a/sound_classification/launch/save_noise.launch
+++ b/sound_classification/launch/save_noise.launch
@@ -34,7 +34,7 @@
-
+
save_data_rate: $(arg save_data_rate)
diff --git a/sound_classification/launch/save_sound.launch b/sound_classification/launch/save_sound.launch
index 56799dd28c..e32cff10e4 100644
--- a/sound_classification/launch/save_sound.launch
+++ b/sound_classification/launch/save_sound.launch
@@ -39,9 +39,9 @@
-
-
-
+
+
+
save_data_rate: $(arg save_data_rate)
@@ -54,7 +54,7 @@
-
+
power_per_pixel_threshold: $(arg threshold)
lazy: false