diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml
index 8be5cd9023..7d7abc3bb0 100644
--- a/.github/workflows/config.yml
+++ b/.github/workflows/config.yml
@@ -66,16 +66,29 @@ jobs:
             NOT_TEST_INSTALL : true
             USE_JENKINS: true
             DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl
-            TEST_PKGS : 'jsk_recognition_msgs' # to skip test
-            BEFORE_SCRIPT : "sudo pip install numpy==1.16.6; sudo pip install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==6.7.0 protobuf==3.18.0 cupy-cuda91 pytesseract==0.3.6 torch==1.4.0;"
+            BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;"
+            BUILD_PKGS: 'checkerboard_detector imagesift jsk_perception jsk_recognition_utils resized_image_transport'
+            EXTRA_DEB: ros-noetic-pr2-description
             experimental : false
           - ROS_DISTRO: noetic
             USE_DEB: false
             NOT_TEST_INSTALL : true
             USE_JENKINS: true
             DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl
-            BEFORE_SCRIPT : "sudo pip install numpy==1.16.6; sudo pip install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==6.7.0 protobuf==3.18.0 cupy-cuda91 pytesseract==0.3.6 torch==1.4.0;"
-            experimental : true
+            TEST_PKGS : 'jsk_pcl_ros_utils jsk_pcl_ros'
+            BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;"
+            BUILD_PKGS: 'jsk_pcl_ros_utils jsk_pcl_ros'
+            EXTRA_DEB: ros-noetic-pr2-description
+            experimental : false
+          - ROS_DISTRO: noetic
+            USE_DEB: false
+            NOT_TEST_INSTALL : true
+            USE_JENKINS: true
+            DOCKER_IMAGE_JENKINS: ros-ubuntu:20.04-pcl
+            TEST_PKGS : 'jsk_recognition_msgs' # to skip test
+            BEFORE_SCRIPT : "sudo pip3 install numpy==1.17.5 scipy==1.3.3 scikit-image==0.16.2 PyWavelets==0.5.1 imageio==2.4.1 pandas==0.25.3; sudo pip3 install fcn chainercv chainer-mask-rcnn==0.3.0 decorator==4.4.2 chainer==7.8.1 protobuf==3.6.1 cupy-cuda91 pytesseract==0.3.10 Pillow==9.3.0 torch==1.4.0 torchvision==0.5.0;sudo dpkg -i /workspace/.ros/data/libpcl-features1.10_1.10.0+dfsg-5ubuntu1_amd64.deb;"
+            EXTRA_DEB: ros-noetic-pr2-description
+            experimental : false
 
 
     steps:
diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml
index 72f28a3229..3940014f6a 100644
--- a/.github/workflows/python2.yml
+++ b/.github/workflows/python2.yml
@@ -15,4 +15,4 @@ jobs:
       - name: Check Python2
         run: |
           apt update -q && apt install -y -q python2
-          python2 -m compileall .
+          python2 -m compileall -x `find docker -name "*.py"` .
diff --git a/.travis.rosinstall.noetic b/.travis.rosinstall.noetic
new file mode 100644
index 0000000000..62d2cb6127
--- /dev/null
+++ b/.travis.rosinstall.noetic
@@ -0,0 +1,4 @@
+- git: # https://github.com/ros-perception/image_transport_plugins/pull/64
+   uri: https://github.com/ros-perception/image_transport_plugins.git
+   local-name: image_transport_plugins
+   version: noetic-devel
diff --git a/audio_to_spectrogram/CMakeLists.txt b/audio_to_spectrogram/CMakeLists.txt
index e2909950b0..f3828e9d70 100644
--- a/audio_to_spectrogram/CMakeLists.txt
+++ b/audio_to_spectrogram/CMakeLists.txt
@@ -8,7 +8,7 @@ find_package(catkin REQUIRED COMPONENTS
 catkin_python_setup()
 
 generate_dynamic_reconfigure_options(
-  cfg/AudioAmplitudePlot.cfg
+  cfg/DataAmplitudePlot.cfg
 )
 
 catkin_package()
@@ -41,4 +41,10 @@ if(CATKIN_ENABLE_TESTING)
   find_package(catkin REQUIRED COMPONENTS rostest roslaunch)
   add_rostest(test/audio_to_spectrogram.test)
   roslaunch_add_file_check(launch/audio_to_spectrogram.launch)
+  if(NOT $ENV{ROS_DISTRO} STRLESS "kinetic")
+    # Under kinetic, eval cannot be used in launch files
+    # http://wiki.ros.org/roslaunch/XML#substitution_args
+    add_rostest(test/wrench_to_spectrogram.test)
+    roslaunch_add_file_check(launch/wrench_to_spectrogram.launch)
+  endif()
 endif()
diff --git a/audio_to_spectrogram/README.md b/audio_to_spectrogram/README.md
index 46070a99f8..321aac4cd7 100644
--- a/audio_to_spectrogram/README.md
+++ b/audio_to_spectrogram/README.md
@@ -1,6 +1,6 @@
 # audio_to_spectrogram
 
-This package converts audio data to spectrum and spectrogram data.
+This package converts audio data (or other time-series data) to spectrum and spectrogram data.
 
 # Usage
 By following command, you can publish audio, spectrum and spectrogram topics. Please set correct args for your microphone configuration, such as mic\_sampling\_rate or bitdepth.
@@ -9,6 +9,13 @@ By following command, you can publish audio, spectrum and spectrogram topics. Pl
 roslaunch audio_to_spectrogram audio_to_spectrogram.launch
 ```
 
+Its data conversion pipeline is as follows:
+```
+audio_to_spectrum.py -> spectrum
+                     -> normalized_half_spectrum
+                     -> log_spectrum             -> preprocess node(s) -> preprocessed spectrum -> spectrum_to_spectrogram.py -> spectrogram
+```
+
 Here is an example using rosbag with 300Hz audio.
 ```bash
 roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
@@ -18,19 +25,48 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
 |---|---|---|
 |<img src="docs/images/audio_amplitude.jpg" width="429">|![](https://user-images.githubusercontent.com/19769486/82075694-9a7ac300-9717-11ea-899c-db6119a76d52.png)|![](https://user-images.githubusercontent.com/19769486/82075685-96e73c00-9717-11ea-9abc-e6e74104d666.png)|
 
+You can also convert data other than audio to spectrum and spectrogram data using this package.  
+Here is an example using rosbag of a force torque sensor sensing drill vibration.
+```bash
+roslaunch audio_to_spectrogram sample_wrench_to_spectrogram.launch
+```
+
+|Z-axis Force Amplitude|Normalized Half Spectrum|Spectrogram Source Spectrum|Spectrogram|
+|---|---|---|---|
+|<img src="docs/images/wrench_amplitude.jpg">|<img src="docs/images/wrench_normalized_half_spectrum.jpg">|<img src="docs/images/wrench_spectrogram_source.jpg">|<img src="docs/images/wrench_spectrogram.jpg">|
+
 # Scripts
 
 ## audio_to_spectrum.py
+
   A script to convert audio to spectrum.
 
   - ### Publishing topics
-
     - `~spectrum` (`jsk_recognition_msgs/Spectrum`)
 
-      Spectrum data calculated from audio by FFT.
+      Spectrum data calculated from audio by FFT.  
+      It is usual "amplitude spectrum".  
+      See https://ryo-iijima.com/fftresult/ for details.
+
+    - `~normalized_half_spectrum` (`jsk_recognition_msgs/Spectrum`)
+
+      Spectrum data which is "half" (having non-negative frequencies (0Hz-Nyquist frequency)) and is "normalized" (consistent with the amplitude of the original signal).  
+      See the following for details.
+      - https://ryo-iijima.com/fftresult/
+      - https://stackoverflow.com/questions/63211851/why-divide-the-output-of-numpy-fft-by-n
+      - https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issue-1550715400
+
+    - `~log_spectrum` (`jsk_recognition_msgs/Spectrum`)
+
+      Log-scaled spectrum data.  
+      It is calculated by applying log to the absolute value of the FFT result.  
+      Usually, log is applied to "power spectrum", but we don't use it for simplicity.  
+      See the following for details.
+      - https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issuecomment-1445810380
+      - http://makotomurakami.com/blog/2020/05/23/5266/
 
   - ### Subscribing topics
-    - `audio` (`audio_common_msgs/AudioData`)
+    - `~audio` (`audio_common_msgs/AudioData`)
 
       Audio stream data from microphone. The audio format must be `wave`.
 
@@ -55,15 +91,94 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
 
       Number of bits per audio data.
 
-    - `~high_cut_freq` (`Int`, default: `800`)
+    - `~fft_exec_rate` (`Double`, default: `50`)
+
+      Rate [Hz] to execute FFT and publish its results.
+
+## data_to_spectrum.py
+
+  Generalized version of `audio_to_spectrum.py`.  
+  This script can convert multiple message types to spectrum.
+
+  - ### Publishing topics
+
+    Same as `audio_to_spectrum.py`.
+
+  - ### Subscribing topics
+    - `~input` (`AnyMsg`)
+
+      Topic to which message including data you want to convert to spectrum is published.
+
+  - ### Parameters
+    - `~expression_to_get_data` (`String`, default: `m.data`)
+
+      Python expression to get data from the input message `m`. For example, if your input is `std_msgs/Float64`, it is `m.data`.  
+      Just accessing a field of `m` is recommended.  
+      If you want to do a complex calculation (e.g., using `numpy`), use `transform` of `topic_tools` before this node.
+
+    - `~data_sampling_rate` (`Int`, default: `500`)
+
+      Sampling rate [Hz] of input data.
+
+    - `~fft_sampling_period` (`Double`, default: `0.3`)
+
+      Period [s] to sample input data for one FFT.
+
+    - `~fft_exec_rate` (`Double`, default: `50`)
+
+      Rate [Hz] to execute FFT and publish its results.
+
+    - `~is_integer` (`Bool`, default: `false`)
+
+      Whether input data is integer or not. For example, if your input is `std_msgs/Float64`, it is `false`.
+
+    - `~is_signed` (`Bool`, default: `true`)
+
+      Whether input data is signed or not. For example, if your input is `std_msgs/Float64`, it is `true`.
+
+    - `~bitdepth` (`Int`, default: `64`)
+
+      Number of bits per input data. For example, if your input is `std_msgs/Float64`, it is `64`.
+
+    - `~n_channel` (`Int`, default: `1`)
+
+      If your input is scalar, it is `1`.  
+      If your input is flattened 2D matrix, it is number of channel of original matrix.
+
+    - `~target_channel` (`Int`, default: `0`)
+
+      If your input is scalar, it is `0`.  
+      If your input is flattened 2D matrix, it is target channel.
+
+## spectrum_filter.py
+
+  A script to filter spectrum.
+
+  - ### Publishing topics
+    - `~output` (`jsk_recognition_msgs/Spectrum`)
+
+      Filtered spectrum data (`low_cut_freq`-`high_cut_freq`).
+
+  - ### Subscribing topics
+    - `~input` (`jsk_recognition_msgs/Spectrum`)
+
+      Original spectrum data.
+
+  - ### Parameters
+    - `~data_sampling_rate` (`Int`, default: `500`)
+
+      Sampling rate [Hz] of data used in generation of original spectrum data.
+
+    - `~high_cut_freq` (`Int`, default: `250`)
 
       Threshold to limit the maximum frequency of the output spectrum.
 
-    - `~low_cut_freq` (`Int`, default: `1`)
+    - `~low_cut_freq` (`Int`, default: `0`)
 
       Threshold to limit the minimum frequency of the output spectrum.
 
 ## spectrum_to_spectrogram.py
+
   A script to convert spectrum to spectrogram.
 
   - ### Publishing topics
@@ -128,7 +243,7 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
 
       Number of bits per audio data.
 
-    - `~maximum_amplitude` (`Int`, default: `10000`)
+    - `~maximum_amplitude` (`Double`, default: `10000.0`)
 
       Maximum range of amplitude to plot.
 
@@ -140,6 +255,66 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
 
       Publish rate [Hz] of audio amplitude image topic.
 
+## data_amplitude_plot.py
+
+  Generalized version of `audio_amplitude_plot.py`.
+
+  - ### Publishing topics
+
+    - `~output/viz` (`sensor_msgs/Image`)
+
+      Data amplitude plot image.
+
+  - ### Subscribing topics
+    - `~input` (`AnyMsg`)
+
+      Topic to which message including data whose amplitude you want to plot is published.
+
+  - ### Parameters
+    - `~expression_to_get_data` (`String`, default: `m.data`)
+
+      Python expression to get data from the input message `m`. For example, if your input is `std_msgs/Float64`, it is `m.data`.  
+      Just accessing a field of `m` is recommended.  
+      If you want to do a complex calculation (e.g., using `numpy`), use `transform` of `topic_tools` before this node.
+
+    - `~data_sampling_rate` (`Int`, default: `500`)
+
+      Sampling rate [Hz] of input data.
+
+    - `~is_integer` (`Bool`, default: `false`)
+
+      Whether input data is integer or not. For example, if your input is `std_msgs/Float64`, it is `false`.
+
+    - `~is_signed` (`Bool`, default: `true`)
+
+      Whether input data is signed or not. For example, if your input is `std_msgs/Float64`, it is `true`.
+
+    - `~bitdepth` (`Int`, default: `64`)
+
+      Number of bits per input data. For example, if your input is `std_msgs/Float64`, it is `64`.
+
+    - `~n_channel` (`Int`, default: `1`)
+
+      If your input is scalar, it is `1`.  
+      If your input is flattened 2D matrix, it is number of channel of original matrix.
+
+    - `~target_channel` (`Int`, default: `0`)
+
+      If your input is scalar, it is `0`.  
+      If your input is flattened 2D matrix, it is target channel.
+
+    - `~maximum_amplitude` (`Double`, default: `10.0`)
+
+      Maximum range of amplitude to plot.
+
+    - `~window_size` (`Double`, default: `10.0`)
+
+      Window size of input data to plot.
+
+    - `~rate` (`Double`, default: `10.0`)
+
+      Publish rate [Hz] of data amplitude image topic.
+
 ## spectrum_plot.py
 
   A script to publish frequency vs amplitude plot image.
@@ -157,3 +332,20 @@ roslaunch audio_to_spectrogram sample_audio_to_spectrogram.launch
     - `~spectrum` (`jsk_recognition_msgs/Spectrum`)
 
       Spectrum data calculated from audio by FFT.
+
+  - ### Parameters
+    - `~min_amp` (`Double`, default: `0.0`)
+
+      Minimum value of amplitude in plot.
+
+    - `~max_amp` (`Double`, default: `20.0`)
+
+      Maximum value of amplitude in plot.
+
+    - `~queue_size` (`Int`, default: `1`)
+
+      Queue size of spectrum subscriber.
+
+    - `~max_rate` (`Double`, default: `-1`)
+
+      Maximum publish rate [Hz] of frequency vs amplitude plot image. Setting this value low reduces CPU load. `-1` means no maximum limit.
diff --git a/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg b/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg
deleted file mode 100755
index a626a14603..0000000000
--- a/audio_to_spectrogram/cfg/AudioAmplitudePlot.cfg
+++ /dev/null
@@ -1,13 +0,0 @@
-#! /usr/bin/env python
-
-PACKAGE = 'audio_to_spectrogram'
-
-from dynamic_reconfigure.parameter_generator_catkin import *
-
-
-gen = ParameterGenerator()
-
-gen.add("maximum_amplitude", int_t, 0, "Maximum range of amplitude to plot", 10000, 1, 100000)
-gen.add("window_size", double_t, 0,  "Window size of sound input to plot", 10.0, 0.1, 10000)
-
-exit(gen.generate(PACKAGE, PACKAGE, "AudioAmplitudePlot"))
diff --git a/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg b/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg
new file mode 100755
index 0000000000..9c2c962160
--- /dev/null
+++ b/audio_to_spectrogram/cfg/DataAmplitudePlot.cfg
@@ -0,0 +1,13 @@
+#! /usr/bin/env python
+
+PACKAGE = 'audio_to_spectrogram'
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+
+gen = ParameterGenerator()
+
+gen.add("maximum_amplitude", double_t, 0, "Maximum range of amplitude to plot", 10.0)
+gen.add("window_size", double_t, 0,  "Window size of data input to plot", 10.0)
+
+exit(gen.generate(PACKAGE, PACKAGE, "DataAmplitudePlot"))
diff --git a/audio_to_spectrogram/docs/images/wrench_amplitude.jpg b/audio_to_spectrogram/docs/images/wrench_amplitude.jpg
new file mode 100644
index 0000000000..fd1ae4dc0a
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_amplitude.jpg differ
diff --git a/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg b/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg
new file mode 100644
index 0000000000..11065960ee
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_normalized_half_spectrum.jpg differ
diff --git a/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg b/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg
new file mode 100644
index 0000000000..0e7b7679eb
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_spectrogram.jpg differ
diff --git a/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg b/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg
new file mode 100644
index 0000000000..27e427484e
Binary files /dev/null and b/audio_to_spectrogram/docs/images/wrench_spectrogram_source.jpg differ
diff --git a/audio_to_spectrogram/launch/audio_to_spectrogram.launch b/audio_to_spectrogram/launch/audio_to_spectrogram.launch
index 56f1c139aa..ae0eaf8551 100644
--- a/audio_to_spectrogram/launch/audio_to_spectrogram.launch
+++ b/audio_to_spectrogram/launch/audio_to_spectrogram.launch
@@ -2,20 +2,34 @@
 <!-- audio -> spectrum -> spectrogram -->
 
 <launch>
+
   <arg name="launch_audio_capture" default="true" />
 
+  <arg name="audio_capture_ns" default="/" />
   <arg name="bitdepth" default="16" />
   <arg name="mic_sampling_rate" default="16000" />
   <arg name="n_channel" default="1" />
+  <arg name="target_channel" default="0" />
   <arg name="device" default="hw:0,0" />
   <arg name="audio_topic" default="/audio" />
+  <arg name="fft_sampling_period" default="0.3" />
+  <arg name="fft_exec_rate" default="50" />
+  <arg name="audio_amp_plot_max_amp" default="10000.0" />
+  <arg name="audio_amp_plot_window_size" default="10.0" />
+  <arg name="audio_amp_plot_rate" default="10.0" />
+  <arg name="spectrogram_source" default="audio_to_spectrum/log_spectrum" />
   <arg name="high_cut_freq" default="800" />
-  <arg name="low_cut_freq" default="1" />
+  <arg name="low_cut_freq" default="1" />  <!-- default: remove 0Hz because this component is meaningless in audio -->
   <arg name="spectrogram_period" default="5" />
+  <arg name="preprocessed_spectrogram_source_plot_min_amp" default="0.0" />
+  <arg name="preprocessed_spectrogram_source_plot_max_amp" default="20.0" />
+  <arg name="preprocessed_spectrogram_source_plot_queue_size" default="1" />
+  <arg name="preprocessed_spectrogram_source_plot_max_rate" default="-1" />
   <arg name="gui" default="true" />
 
   <!-- Publish audio topic from microphone -->
-  <node name="audio_capture" pkg="audio_capture" type="audio_capture"
+  <node name="audio_capture" ns="$(arg audio_capture_ns)"
+        pkg="audio_capture" type="audio_capture"
         if="$(arg launch_audio_capture)"
         respawn="true">
     <rosparam subst_value="true">
@@ -28,66 +42,32 @@
   </node>
 
   <!-- convert audio topic to spectrum topic -->
-  <node pkg="audio_to_spectrogram" type="audio_to_spectrum.py" name="audio_to_spectrum" respawn="true">
-    <remap from="~audio" to="$(arg audio_topic)" />
-    <rosparam subst_value="true">
-      n_channel: $(arg n_channel)
-      mic_sampling_rate: $(arg mic_sampling_rate)
-      fft_sampling_period: 0.3
-      bitdepth: $(arg bitdepth)
-      high_cut_freq: $(arg high_cut_freq)
-      low_cut_freq: $(arg low_cut_freq)
-      fft_exec_rate: 50
-    </rosparam>
-  </node>
+  <include file="$(find audio_to_spectrogram)/launch/audio_to_spectrum.launch">
+    <arg name="audio_topic" value="$(arg audio_topic)" />
+    <arg name="n_channel" value="$(arg n_channel)" />
+    <arg name="target_channel" value="$(arg target_channel)" />
+    <arg name="mic_sampling_rate" value="$(arg mic_sampling_rate)" />
+    <arg name="fft_sampling_period" value="$(arg fft_sampling_period)" />
+    <arg name="bitdepth" value="$(arg bitdepth)" />
+    <arg name="fft_exec_rate" value="$(arg fft_exec_rate)" />
+    <arg name="audio_amp_plot_max_amp" value="$(arg audio_amp_plot_max_amp)" />
+    <arg name="audio_amp_plot_window_size" value="$(arg audio_amp_plot_window_size)" />
+    <arg name="audio_amp_plot_rate" value="$(arg audio_amp_plot_rate)" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
 
   <!-- convert spectrum topic to spectrogram topic -->
-  <node pkg="audio_to_spectrogram" type="spectrum_to_spectrogram.py" name="spectrum_to_spectrogram" respawn="true">
-    <remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" />
-    <rosparam subst_value="true">
-      image_height: 300
-      image_width: 300
-      spectrogram_period: $(arg spectrogram_period)
-    </rosparam>
-  </node>
-
-  <!-- publish amplitude image -->
-  <node name="audio_amplitude_plot"
-        pkg="audio_to_spectrogram" type="audio_amplitude_plot.py" output="screen">
-    <remap from="~audio" to="$(arg audio_topic)" />
-    <rosparam subst_value="true">
-      n_channel: $(arg n_channel)
-      mic_sampling_rate: $(arg mic_sampling_rate)
-      bitdepth: $(arg bitdepth)
-    </rosparam>
-  </node>
-
-  <!-- publish frequency vs amplitude image -->
-  <node name="spectrum_plot"
-        pkg="audio_to_spectrogram" type="spectrum_plot.py" >
-    <remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" />
-  </node>
-
-  <group if="$(arg gui)">
-    <node pkg="image_view" type="image_view" name="audio_amplitude_view"  >
-      <remap from="image" to="/audio_amplitude_plot/output/viz" />
-    </node>
-
-    <node pkg="image_view" type="image_view" name="spectrum_view"  >
-      <remap from="image" to="/spectrum_plot/output/viz" />
-    </node>
-
-    <!-- visualize spectrogram -->
-    <!-- you can get spectrogram image topic from /spectrogram_view/output-->
-    <node pkg="image_view" type="image_view" name="spectrogram_view"  >
-      <remap from="image" to="/spectrum_to_spectrogram/spectrogram" />
-      <rosparam>
-        <!-- set all pixels between 0 and 255 -->
-        do_dynamic_scaling: true
-        <!-- use jet colormap -->
-        colormap: 2
-      </rosparam>
-    </node>
-  </group>
+  <include file="$(find audio_to_spectrogram)/launch/spectrum_to_spectrogram.launch">
+    <arg name="spectrum" value="$(arg spectrogram_source)" />
+    <arg name="data_sampling_rate" value="$(arg mic_sampling_rate)" />
+    <arg name="high_cut_freq" value="$(arg high_cut_freq)" />
+    <arg name="low_cut_freq" value="$(arg low_cut_freq)" />
+    <arg name="spectrogram_period" value="$(arg spectrogram_period)" />
+    <arg name="preprocessed_spectrum_plot_min_amp" value="$(arg preprocessed_spectrogram_source_plot_min_amp)" />
+    <arg name="preprocessed_spectrum_plot_max_amp" value="$(arg preprocessed_spectrogram_source_plot_max_amp)" />
+    <arg name="preprocessed_spectrum_plot_queue_size" value="$(arg preprocessed_spectrogram_source_plot_queue_size)" />
+    <arg name="preprocessed_spectrum_plot_max_rate" value="$(arg preprocessed_spectrogram_source_plot_max_rate)" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
 
 </launch>
diff --git a/audio_to_spectrogram/launch/audio_to_spectrum.launch b/audio_to_spectrogram/launch/audio_to_spectrum.launch
new file mode 100644
index 0000000000..fd2650fdee
--- /dev/null
+++ b/audio_to_spectrogram/launch/audio_to_spectrum.launch
@@ -0,0 +1,52 @@
+<launch>
+
+  <arg name="audio_topic" default="/audio" />
+  <arg name="n_channel" default="1" />
+  <arg name="target_channel" default="0" />
+  <arg name="mic_sampling_rate" default="16000" />
+  <arg name="fft_sampling_period" default="0.3" />
+  <arg name="bitdepth" default="16" />
+  <arg name="fft_exec_rate" default="50" />
+  <arg name="audio_amp_plot_max_amp" default="10000.0" />
+  <arg name="audio_amp_plot_window_size" default="10.0" />
+  <arg name="audio_amp_plot_rate" default="10.0" />
+  <arg name="gui" default="true" />
+
+  <!-- convert audio topic to spectrum topic -->
+  <node name="audio_to_spectrum"
+        pkg="audio_to_spectrogram" type="audio_to_spectrum.py"
+        respawn="true">
+    <remap from="~audio" to="$(arg audio_topic)" />
+    <rosparam subst_value="true">
+      n_channel: $(arg n_channel)
+      target_channel: $(arg target_channel)
+      mic_sampling_rate: $(arg mic_sampling_rate)
+      fft_sampling_period: $(arg fft_sampling_period)
+      bitdepth: $(arg bitdepth)
+      fft_exec_rate: $(arg fft_exec_rate)
+    </rosparam>
+  </node>
+
+  <!-- publish amplitude image -->
+  <node name="audio_amplitude_plot"
+        pkg="audio_to_spectrogram" type="audio_amplitude_plot.py"
+        output="screen">
+    <remap from="~audio" to="$(arg audio_topic)" />
+    <rosparam subst_value="true">
+      n_channel: $(arg n_channel)
+      target_channel: $(arg target_channel)
+      mic_sampling_rate: $(arg mic_sampling_rate)
+      bitdepth: $(arg bitdepth)
+      maximum_amplitude: $(arg audio_amp_plot_max_amp)
+      window_size: $(arg audio_amp_plot_window_size)
+      rate: $(arg audio_amp_plot_rate)
+    </rosparam>
+  </node>
+
+  <group if="$(arg gui)">
+    <node pkg="image_view" type="image_view" name="audio_amplitude_view"  >
+      <remap from="image" to="audio_amplitude_plot/output/viz" />
+    </node>
+  </group>
+
+</launch>
diff --git a/audio_to_spectrogram/launch/data_to_spectrum.launch b/audio_to_spectrogram/launch/data_to_spectrum.launch
new file mode 100644
index 0000000000..3acf38845f
--- /dev/null
+++ b/audio_to_spectrogram/launch/data_to_spectrum.launch
@@ -0,0 +1,61 @@
+<launch>
+
+  <arg name="data_topic" />
+  <arg name="expression_to_get_data" default="m.data" />
+  <arg name="data_sampling_rate" default="500" />
+  <arg name="fft_sampling_period" default="0.3" />
+  <arg name="is_integer" default="false" />
+  <arg name="is_signed" default="true" />
+  <arg name="bitdepth" default="64" />
+  <arg name="n_channel" default="1" />
+  <arg name="target_channel" default="0" />
+  <arg name="fft_exec_rate" default="50" />
+  <arg name="data_amp_plot_max_amp" default="10.0" />
+  <arg name="data_amp_plot_window_size" default="10.0" />
+  <arg name="data_amp_plot_rate" default="10.0" />
+  <arg name="gui" default="true" />
+
+  <!-- convert data topic to spectrum topic -->
+  <node name="data_to_spectrum"
+        pkg="audio_to_spectrogram" type="data_to_spectrum.py"
+        respawn="true">
+    <remap from="~input" to="$(arg data_topic)" />
+    <rosparam subst_value="true">
+      expression_to_get_data: $(arg expression_to_get_data)
+      data_sampling_rate: $(arg data_sampling_rate)
+      fft_sampling_period: $(arg fft_sampling_period)
+      is_integer: $(arg is_integer)
+      is_signed: $(arg is_signed)
+      bitdepth: $(arg bitdepth)
+      n_channel: $(arg n_channel)
+      target_channel: $(arg target_channel)
+      fft_exec_rate: $(arg fft_exec_rate)
+    </rosparam>
+  </node>
+
+  <!-- publish amplitude image -->
+  <node name="data_amplitude_plot"
+        pkg="audio_to_spectrogram" type="data_amplitude_plot.py"
+        output="screen">
+    <remap from="~input" to="$(arg data_topic)" />
+    <rosparam subst_value="true">
+      expression_to_get_data: $(arg expression_to_get_data)
+      data_sampling_rate: $(arg data_sampling_rate)
+      is_integer: $(arg is_integer)
+      is_signed: $(arg is_signed)
+      bitdepth: $(arg bitdepth)
+      n_channel: $(arg n_channel)
+      target_channel: $(arg target_channel)
+      maximum_amplitude: $(arg data_amp_plot_max_amp)
+      window_size: $(arg data_amp_plot_window_size)
+      rate: $(arg data_amp_plot_rate)
+    </rosparam>
+  </node>
+
+  <group if="$(arg gui)">
+    <node pkg="image_view" type="image_view" name="data_amplitude_view"  >
+      <remap from="image" to="data_amplitude_plot/output/viz" />
+    </node>
+  </group>
+
+</launch>
diff --git a/audio_to_spectrogram/launch/spectrum_plot.launch b/audio_to_spectrogram/launch/spectrum_plot.launch
new file mode 100644
index 0000000000..1ec8c2cf14
--- /dev/null
+++ b/audio_to_spectrogram/launch/spectrum_plot.launch
@@ -0,0 +1,29 @@
+<launch>
+
+  <arg name="spectrum" />
+  <arg name="plotter_name" default="spectrum_plot" />
+  <arg name="min_amp" default="0.0" />
+  <arg name="max_amp" default="20.0" />
+  <arg name="queue_size" default="1" />
+  <arg name="max_rate" default="-1" />
+  <arg name="gui" default="true" />
+
+  <!-- publish frequency vs amplitude image -->
+  <node name="$(arg plotter_name)"
+        pkg="audio_to_spectrogram" type="spectrum_plot.py">
+    <remap from="~spectrum" to="$(arg spectrum)" />
+    <rosparam subst_value="true">
+      min_amp: $(arg min_amp)
+      max_amp: $(arg max_amp)
+      queue_size: $(arg queue_size)
+      max_rate: $(arg max_rate)
+    </rosparam>
+  </node>
+
+  <group if="$(arg gui)">
+    <node pkg="image_view" type="image_view" name="$(arg plotter_name)_view">
+      <remap from="image" to="$(arg plotter_name)/output/viz" />
+    </node>
+  </group>
+
+</launch>
diff --git a/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch b/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch
new file mode 100644
index 0000000000..11cae6f7c2
--- /dev/null
+++ b/audio_to_spectrogram/launch/spectrum_to_spectrogram.launch
@@ -0,0 +1,65 @@
+<launch>
+
+  <arg name="spectrum" />
+  <arg name="data_sampling_rate" default="500" />
+  <arg name="high_cut_freq" default="250" />
+  <arg name="low_cut_freq" default="5" />
+  <arg name="spectrogram_height" default="300"/>
+  <arg name="spectrogram_width" default="300"/>
+  <arg name="spectrogram_period" default="5"/>
+  <arg name="preprocessed_spectrum_plot_min_amp" default="-10.0" />
+  <arg name="preprocessed_spectrum_plot_max_amp" default="10.0" />
+  <arg name="preprocessed_spectrum_plot_queue_size" default="1" />
+  <arg name="preprocessed_spectrum_plot_max_rate" default="-1" />
+  <arg name="gui" default="true" />
+  <arg name="do_dynamic_scaling" default="true"/>  <!-- default: set all pixels between 0 and 255 -->
+  <arg name="colormap" default="2"/>  <!-- default: jet colormap -->
+
+  <!-- preprocess spectrum -->
+  <node name="spectrogram_source_filter"
+        pkg="audio_to_spectrogram" type="spectrum_filter.py"
+        respawn="true">
+    <remap from="~input" to="$(arg spectrum)" />
+    <rosparam subst_value="true">
+      data_sampling_rate: $(arg data_sampling_rate)
+      high_cut_freq: $(arg high_cut_freq)
+      low_cut_freq: $(arg low_cut_freq)
+    </rosparam>
+  </node>
+
+  <!-- convert spectrum topic to spectrogram topic -->
+  <node name="spectrum_to_spectrogram"
+        pkg="audio_to_spectrogram" type="spectrum_to_spectrogram.py"
+        respawn="true">
+    <remap from="~spectrum" to="spectrogram_source_filter/output" />
+    <rosparam subst_value="true">
+      image_height: $(arg spectrogram_height)
+      image_width: $(arg spectrogram_width)
+      spectrogram_period: $(arg spectrogram_period)
+    </rosparam>
+  </node>
+
+  <!-- visualize frequency vs amplitude image -->
+  <include file="$(find audio_to_spectrogram)/launch/spectrum_plot.launch">
+    <arg name="spectrum" value="spectrogram_source_filter/output" />
+    <arg name="plotter_name" value="preprocessed_spectrogram_source_plot" />
+    <arg name="min_amp" value="$(arg preprocessed_spectrum_plot_min_amp)" />
+    <arg name="max_amp" value="$(arg preprocessed_spectrum_plot_max_amp)" />
+    <arg name="queue_size" value="$(arg preprocessed_spectrum_plot_queue_size)" />
+    <arg name="max_rate" value="$(arg preprocessed_spectrum_plot_max_rate)" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
+
+  <group if="$(arg gui)">
+    <!-- visualize spectrogram -->
+    <!-- you can get spectrogram image topic from /spectrogram_view/output-->
+    <node pkg="image_view" type="image_view" name="spectrogram_view"  >
+      <remap from="image" to="spectrum_to_spectrogram/spectrogram" />
+      <rosparam subst_value="true">
+        do_dynamic_scaling: $(arg do_dynamic_scaling)
+        colormap: $(arg colormap)
+      </rosparam>
+    </node>
+  </group>
+
+</launch>
diff --git a/audio_to_spectrogram/launch/wrench_to_spectrogram.launch b/audio_to_spectrogram/launch/wrench_to_spectrogram.launch
new file mode 100644
index 0000000000..9c1ca7e144
--- /dev/null
+++ b/audio_to_spectrogram/launch/wrench_to_spectrogram.launch
@@ -0,0 +1,87 @@
+<!-- data flow  -->
+<!-- WrenchStamped (structured data) -> scalar data -> spectrum -> spectrogram -->
+
+<launch>
+
+  <arg name="how_to_get_scalar" default="element_extraction" />
+  <arg name="target_element" default="force.z" />
+  <arg name="data_sampling_rate" default="500" />
+  <arg name="wrench_topic" default="/ft_sensor" />
+  <arg name="fft_sampling_period" default="0.3" />
+  <arg name="fft_exec_rate" default="50" />
+  <arg name="data_amp_plot_max_amp" default="10.0" />
+  <arg name="data_amp_plot_window_size" default="10.0" />
+  <arg name="data_amp_plot_rate" default="10.0" />
+  <arg name="spectrogram_source" default="data_to_spectrum/log_spectrum" />
+  <arg name="high_cut_freq" default="250" />
+  <arg name="low_cut_freq" default="5" />  <!-- default: remove constant offset of force torque sensor (e.g., end effector's weight) which is bad for spectrogram color variety -->
+  <arg name="spectrogram_period" default="5" />
+  <arg name="preprocessed_spectrogram_source_plot_min_amp" default="-10.0" />
+  <arg name="preprocessed_spectrogram_source_plot_max_amp" default="10.0" />
+  <arg name="preprocessed_spectrogram_source_plot_queue_size" default="1" />
+  <arg name="preprocessed_spectrogram_source_plot_max_rate" default="-1" />
+  <arg name="normalized_half_spectrum_plot_min_amp" default="0.0" />
+  <arg name="normalized_half_spectrum_plot_max_amp" default="0.2" />
+  <arg name="normalized_half_spectrum_plot_queue_size" default="1" />
+  <arg name="normalized_half_spectrum_plot_max_rate" default="-1" />
+  <arg name="gui" default="true" />
+
+  <arg name="DATA_TOPIC" value="wrench_to_scalar_data/output" />
+
+  <arg if="$(eval arg('how_to_get_scalar') == 'element_extraction')"
+       name="WRENCH2SCALAR" value="m.wrench.$(arg target_element)" />
+  <arg if="$(eval arg('how_to_get_scalar') == 'force_norm')"
+       name="WRENCH2SCALAR" value="numpy.linalg.norm([m.wrench.force.x, m.wrench.force.y, m.wrench.force.z])" />
+  <arg if="$(eval arg('how_to_get_scalar') == 'torque_norm')"
+       name="WRENCH2SCALAR" value="numpy.linalg.norm([m.wrench.torque.x, m.wrench.torque.y, m.wrench.torque.z])" />
+
+  <node name="wrench_to_scalar_data"
+        pkg="topic_tools" type="transform"
+        args="$(arg wrench_topic) $(arg DATA_TOPIC) std_msgs/Float64 '$(arg WRENCH2SCALAR)' --wait-for-start"
+        respawn="true" />
+
+  <!-- convert data topic to spectrum topic -->
+  <include file="$(find audio_to_spectrogram)/launch/data_to_spectrum.launch">
+    <arg name="data_topic" value="$(arg DATA_TOPIC)" />
+    <arg name="expression_to_get_data" value="m.data" />  <!-- std_msgs/Float64 data field -->
+    <arg name="data_sampling_rate" value="$(arg data_sampling_rate)" />
+    <arg name="fft_sampling_period" value="$(arg fft_sampling_period)" />
+    <arg name="is_integer" value="false" />  <!-- std_msgs/Float64 data is not integer -->
+    <arg name="is_signed" value="true" />  <!-- std_msgs/Float64 data is signed -->
+    <arg name="bitdepth" value="64" />  <!-- std_msgs/Float64 data has 64 bits -->
+    <arg name="n_channel" value="1" />  <!-- std_msgs/Float64 data is not array -->
+    <arg name="target_channel" value="0" />  <!-- std_msgs/Float64 data is not array -->
+    <arg name="fft_exec_rate" value="$(arg fft_exec_rate)" />
+    <arg name="data_amp_plot_max_amp" value="$(arg data_amp_plot_max_amp)" />
+    <arg name="data_amp_plot_window_size" value="$(arg data_amp_plot_window_size)" />
+    <arg name="data_amp_plot_rate" value="$(arg data_amp_plot_rate)" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
+
+  <!-- convert spectrum topic to spectrogram topic -->
+  <include file="$(find audio_to_spectrogram)/launch/spectrum_to_spectrogram.launch">
+    <arg name="spectrum" value="$(arg spectrogram_source)" />
+    <arg name="data_sampling_rate" value="$(arg data_sampling_rate)" />
+    <arg name="high_cut_freq" value="$(arg high_cut_freq)" />
+    <arg name="low_cut_freq" value="$(arg low_cut_freq)" />
+    <arg name="spectrogram_period" value="$(arg spectrogram_period)" />
+    <arg name="preprocessed_spectrum_plot_min_amp" value="$(arg preprocessed_spectrogram_source_plot_min_amp)" />
+    <arg name="preprocessed_spectrum_plot_max_amp" value="$(arg preprocessed_spectrogram_source_plot_max_amp)" />
+    <arg name="preprocessed_spectrum_plot_queue_size" value="$(arg preprocessed_spectrogram_source_plot_queue_size)" />
+    <arg name="preprocessed_spectrum_plot_max_rate" value="$(arg preprocessed_spectrogram_source_plot_max_rate)" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
+
+  <!-- visualize frequency vs amplitude image of normalized half spectrum -->
+  <!-- Unlike audio, normalized amplitude is meaningful in force torque sensor (the same magnitude as the real force/torque) -->
+  <include file="$(find audio_to_spectrogram)/launch/spectrum_plot.launch">
+    <arg name="spectrum" value="data_to_spectrum/normalized_half_spectrum" />
+    <arg name="plotter_name" value="normalized_half_spectrum_plot" />
+    <arg name="min_amp" value="$(arg normalized_half_spectrum_plot_min_amp)" />
+    <arg name="max_amp" value="$(arg normalized_half_spectrum_plot_max_amp)" />
+    <arg name="queue_size" value="$(arg normalized_half_spectrum_plot_queue_size)" />
+    <arg name="max_rate" value="$(arg normalized_half_spectrum_plot_max_rate)" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
+
+</launch>
diff --git a/audio_to_spectrogram/package.xml b/audio_to_spectrogram/package.xml
index 3d72033ad7..a1f5101094 100644
--- a/audio_to_spectrogram/package.xml
+++ b/audio_to_spectrogram/package.xml
@@ -15,13 +15,18 @@
   <exec_depend>audio_common_msgs</exec_depend>
   <exec_depend>cv_bridge</exec_depend>
   <exec_depend>dynamic_reconfigure</exec_depend>
+  <exec_depend>geometry_msgs</exec_depend>
   <exec_depend>image_view</exec_depend>
   <exec_depend>jsk_recognition_msgs</exec_depend>
   <exec_depend>jsk_topic_tools</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-matplotlib</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 3">python3-matplotlib</exec_depend>
+  <exec_depend>rostopic</exec_depend>
   <exec_depend>sensor_msgs</exec_depend>
+  <exec_depend>std_msgs</exec_depend>
+  <exec_depend>topic_tools</exec_depend>
 
+  <test_depend>jsk_tools</test_depend>
   <test_depend>roslaunch</test_depend>
   <test_depend>rostest</test_depend>
 
diff --git a/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag b/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag
new file mode 100644
index 0000000000..cdf0cb60e8
Binary files /dev/null and b/audio_to_spectrogram/sample/data/wrench_sensing_drill_vibration.bag differ
diff --git a/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch b/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch
new file mode 100644
index 0000000000..702025ceee
--- /dev/null
+++ b/audio_to_spectrogram/sample/sample_wrench_to_spectrogram.launch
@@ -0,0 +1,19 @@
+<launch>
+
+  <arg name="gui" default="true" />
+
+  <!-- play rosbag -->
+  <arg name="filename" default="$(find audio_to_spectrogram)/sample/data/wrench_sensing_drill_vibration.bag" />
+  <param name="use_sim_time" value="true" />
+  <node name="rosbag_play"
+        pkg="rosbag" type="play"
+        args="$(arg filename) --clock --loop"/>
+
+  <include file="$(find audio_to_spectrogram)/launch/wrench_to_spectrogram.launch" >
+    <arg name="data_sampling_rate" value="650" />
+    <arg name="wrench_topic" value="/bl_force_torque_sensor_driver/output/wrench" />
+    <arg name="high_cut_freq" value="325" />
+    <arg name="gui" value="$(arg gui)" />
+  </include>
+
+</launch>
diff --git a/audio_to_spectrogram/scripts/audio_amplitude_plot.py b/audio_to_spectrogram/scripts/audio_amplitude_plot.py
index 856b149fff..54dc338d3b 100755
--- a/audio_to_spectrogram/scripts/audio_amplitude_plot.py
+++ b/audio_to_spectrogram/scripts/audio_amplitude_plot.py
@@ -1,85 +1,22 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 
-from __future__ import division
-
-import cv_bridge
-from dynamic_reconfigure.server import Server
-from jsk_topic_tools import ConnectionBasedTransport
-import matplotlib.pyplot as plt
-import numpy as np
 import rospy
-import sensor_msgs.msg
 
 from audio_to_spectrogram import AudioBuffer
-from audio_to_spectrogram.cfg import AudioAmplitudePlotConfig as Config
-from audio_to_spectrogram import convert_matplotlib_to_img
+from audio_to_spectrogram import DataAmplitudePlot
 
 
-class AudioAmplitudePlot(ConnectionBasedTransport):
+class AudioAmplitudePlot(DataAmplitudePlot):
 
     def __init__(self):
-        super(AudioAmplitudePlot, self).__init__()
-
-        self.audio_buffer = AudioBuffer.from_rosparam()
-
-        # Set matplotlib config
-        self.fig = plt.figure(figsize=(8, 5))
-        self.ax = self.fig.add_subplot(1, 1, 1)
-        self.ax.grid(True)
-        self.ax.set_xlabel('Time [s]', fontsize=12)
-        self.ax.set_ylabel('Amplitude', fontsize=12)
-        self.line, = self.ax.plot([0, 0], label='Amplitude of Audio')
-
-        # ROS dynamic reconfigure
-        self.srv = Server(Config, self.config_callback)
-
-        self.pub_img = self.advertise(
-            '~output/viz', sensor_msgs.msg.Image, queue_size=1)
-
-    def start_timer(self):
-        rate = rospy.get_param('~rate', 10)
-        if rate == 0:
-            rospy.logwarn('You cannot set 0 as the rate; change it to 10.')
-            rate = 10
-        self.timer = rospy.Timer(rospy.Duration(1.0 / rate), self.timer_cb)
-
-    def stop_timer(self):
-        self.timer.shutdown()
-
-    def subscribe(self):
-        self.audio_buffer.subscribe()
-        self.start_timer()
-
-    def unsubscribe(self):
-        self.audio_buffer.unsubscribe()
-        self.stop_timer()
-
-    def config_callback(self, config, level):
-        self.maximum_amplitude = config.maximum_amplitude
-        self.window_size = config.window_size
-        self.audio_buffer.window_size = self.window_size
-        return config
-
-    def timer_cb(self, timer):
-        window_size = self.window_size
-        amp = self.audio_buffer.read(window_size)
-        if len(amp) == 0:
-            return
-        times = np.linspace(-window_size, 0.0, len(amp))
-
-        # Plot audio amplitude.
-        self.line.set_data(times, amp)
-        self.ax.set_xlim((times.min(), times.max()))
-        self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude))
-
-        self.ax.legend(loc='upper right')
-        if self.pub_img.get_num_connections() > 0:
-            bridge = cv_bridge.CvBridge()
-            img = convert_matplotlib_to_img(self.fig)
-            img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8')
-            img_msg.header.stamp = rospy.Time.now()
-            self.pub_img.publish(img_msg)
+        # Overwrite dynamic_reconfigure's default values
+        param_name = '~maximum_amplitude'
+        if not rospy.has_param(param_name):
+            rospy.set_param(param_name, 10000)
+
+        super(AudioAmplitudePlot, self).__init__(
+            data_buffer=AudioBuffer.from_rosparam(),
+        )
 
 
 if __name__ == '__main__':
diff --git a/audio_to_spectrogram/scripts/audio_to_spectrum.py b/audio_to_spectrogram/scripts/audio_to_spectrum.py
index 920bc67c58..0fd85254cc 100755
--- a/audio_to_spectrogram/scripts/audio_to_spectrum.py
+++ b/audio_to_spectrogram/scripts/audio_to_spectrum.py
@@ -1,67 +1,19 @@
 #!/usr/bin/env python
 
-import numpy as np
 import rospy
 
-from audio_common_msgs.msg import AudioData
-from jsk_recognition_msgs.msg import Spectrum
-
 from audio_to_spectrogram import AudioBuffer
+from audio_to_spectrogram import DataToSpectrum
 
 # This node execute FFT to audio (audio_common_msgs/AudioData)
 # The format of audio topic is assumed to be wave.
 
-class AudioToSpectrum(object):
+class AudioToSpectrum(DataToSpectrum):
 
     def __init__(self):
-        super(AudioToSpectrum, self).__init__()
-
-        self.audio_buffer = AudioBuffer.from_rosparam(auto_start=True)
-        fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3)
-        self.audio_buffer.window_size = fft_sampling_period
-        mic_sampling_rate = self.audio_buffer.input_sample_rate
-
-        # fft config
-        window_function = np.arange(
-            0.0, 1.0, 1.0 / self.audio_buffer.audio_buffer_len)
-        self.window_function = 0.54 - 0.46 * np.cos(
-            2 * np.pi * window_function)
-        high_cut_freq = rospy.get_param('~high_cut_freq', 800)
-        if high_cut_freq > mic_sampling_rate / 2:
-            rospy.logerr('Set high_cut_freq lower than {} Hz'.format(
-                mic_sampling_rate / 2))
-        low_cut_freq = rospy.get_param('~low_cut_freq', 1)  # remove 0 Hz
-        self.freq = np.fft.fftfreq(
-            self.audio_buffer.audio_buffer_len, d=1./mic_sampling_rate)
-        self.cutoff_mask = np.where(
-            (low_cut_freq <= self.freq) & (self.freq <= high_cut_freq),
-            True, False)
-        # How many times fft is executed in one second
-        # fft_exec_rate equals to output spectrogram hz
-        self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50)
-
-        # Publisher and Subscriber
-        self.pub_spectrum = rospy.Publisher(
-            '~spectrum', Spectrum, queue_size=1)
-        self.pub_spectrum_filtered = rospy.Publisher(
-            '~spectrum_filtered', Spectrum, queue_size=1)
-        rospy.Timer(rospy.Duration(1. / self.fft_exec_rate), self.timer_cb)
-
-    def timer_cb(self, timer):
-        if len(self.audio_buffer) != self.audio_buffer.audio_buffer_len:
-            return
-        audio_data = self.audio_buffer.read()
-        # Calc spectrum by fft
-        amplitude = np.fft.fft(audio_data * self.window_function)
-        amplitude = np.log(np.abs(amplitude))
-        spectrum_msg = Spectrum()
-        spectrum_msg.header.stamp = rospy.Time.now()
-        spectrum_msg.amplitude = amplitude
-        spectrum_msg.frequency = self.freq
-        self.pub_spectrum.publish(spectrum_msg)
-        spectrum_msg.amplitude = amplitude[self.cutoff_mask]
-        spectrum_msg.frequency = self.freq[self.cutoff_mask]
-        self.pub_spectrum_filtered.publish(spectrum_msg)
+        super(AudioToSpectrum, self).__init__(
+            data_buffer=AudioBuffer.from_rosparam(auto_start=True),
+        )
 
 
 if __name__ == '__main__':
diff --git a/audio_to_spectrogram/scripts/data_amplitude_plot.py b/audio_to_spectrogram/scripts/data_amplitude_plot.py
new file mode 100755
index 0000000000..e34ad7b97c
--- /dev/null
+++ b/audio_to_spectrogram/scripts/data_amplitude_plot.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+import rospy
+
+from audio_to_spectrogram import DataAmplitudePlot
+
+
+if __name__ == '__main__':
+    rospy.init_node('data_amplitude_plot')
+    DataAmplitudePlot()
+    rospy.spin()
diff --git a/audio_to_spectrogram/scripts/data_to_spectrum.py b/audio_to_spectrogram/scripts/data_to_spectrum.py
new file mode 100755
index 0000000000..62f6e1b798
--- /dev/null
+++ b/audio_to_spectrogram/scripts/data_to_spectrum.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+
+import rospy
+
+from audio_to_spectrogram import DataToSpectrum
+
+# This node execute FFT to input data
+
+if __name__ == '__main__':
+    rospy.init_node('data_to_spectrum')
+    DataToSpectrum()
+    rospy.spin()
diff --git a/audio_to_spectrogram/scripts/spectrum_filter.py b/audio_to_spectrogram/scripts/spectrum_filter.py
new file mode 100755
index 0000000000..bfa26d7808
--- /dev/null
+++ b/audio_to_spectrogram/scripts/spectrum_filter.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+from __future__ import division
+
+import numpy as np
+import rospy
+
+from jsk_recognition_msgs.msg import Spectrum
+from jsk_topic_tools import ConnectionBasedTransport
+
+
+class SpectrumFilter(ConnectionBasedTransport):
+
+    def __init__(self):
+        super(SpectrumFilter, self).__init__()
+        data_sampling_rate = rospy.get_param('~data_sampling_rate', 500)
+        self.high_cut_freq = rospy.get_param('~high_cut_freq', 250)
+        nyquist_freq = data_sampling_rate / 2.0
+        if self.high_cut_freq > nyquist_freq:
+            rospy.logerr('Set high_cut_freq not more than {} Hz'.format(
+                nyquist_freq))
+        self.low_cut_freq = rospy.get_param('~low_cut_freq', 0)
+
+        self.pub = self.advertise('~output', Spectrum, queue_size=1)
+
+    def subscribe(self):
+        self.sub = rospy.Subscriber('~input', Spectrum, self.input_cb)
+
+    def unsubscribe(self):
+        self.sub.unregister()
+
+    def input_cb(self, sub_msg):
+        amp = np.array(sub_msg.amplitude, dtype=np.float32)
+        freq = np.array(sub_msg.frequency, dtype=np.float32)
+        cutoff_mask = np.where(
+            (self.low_cut_freq <= freq) & (freq <= self.high_cut_freq),
+            True, False)
+        pub_msg = Spectrum()
+        pub_msg.header.stamp = sub_msg.header.stamp
+        pub_msg.amplitude = amp[cutoff_mask]
+        pub_msg.frequency = freq[cutoff_mask]
+        self.pub.publish(pub_msg)
+
+
+if __name__ == '__main__':
+    rospy.init_node('spectrum_filter')
+    SpectrumFilter()
+    rospy.spin()
diff --git a/audio_to_spectrogram/scripts/spectrum_plot.py b/audio_to_spectrogram/scripts/spectrum_plot.py
index bf27a824f7..2486d192db 100755
--- a/audio_to_spectrogram/scripts/spectrum_plot.py
+++ b/audio_to_spectrogram/scripts/spectrum_plot.py
@@ -5,6 +5,9 @@
 
 import cv_bridge
 from jsk_topic_tools import ConnectionBasedTransport
+import matplotlib
+from audio_to_spectrogram import check_matplotlib_version; check_matplotlib_version()
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import numpy as np
 import rospy
@@ -19,13 +22,28 @@ class SpectrumPlot(ConnectionBasedTransport):
 
     def __init__(self):
         super(SpectrumPlot, self).__init__()
+        self.min_amp = rospy.get_param('~min_amp', 0.0)
+        self.max_amp = rospy.get_param('~max_amp', 20.0)
+        self.queue_size = rospy.get_param('~queue_size', 1)
+        max_rate = rospy.get_param('~max_rate', -1)
+        if max_rate > 0:
+            self.min_period = 1.0 / max_rate
+        else:
+            self.min_period = -1
+        self.prev_pub_tm = None
         # Set matplotlib config
         self.fig = plt.figure(figsize=(8, 5))
-        self.fig.suptitle('Spectrum plot', size=12)
         self.fig.subplots_adjust(left=0.1, right=0.95, top=0.90, bottom=0.1,
                                  wspace=0.2, hspace=0.6)
         self.ax = self.fig.add_subplot(1, 1, 1)
         self.ax.grid(True)
+        # self.fig.suptitle('Spectrum plot', size=12)
+        self.ax.set_title('Spectrum plot', fontsize=12)
+        # Use self.ax.set_title() instead of
+        # self.fig.suptitle() to use self.fig.tight_layout()
+        # preventing characters from being cut off
+        # cf. https://tm23forest.com/contents/matplotlib-tightlayout-with-figure-suptitle
+        #     https://matplotlib.org/2.2.4/tutorials/intermediate/tight_layout_guide.html
         self.ax.set_xlabel('Frequency [Hz]', fontsize=12)
         self.ax.set_ylabel('Amplitude', fontsize=12)
         self.line, = self.ax.plot([0, 0], label='Amplitude of Spectrum')
@@ -35,19 +53,31 @@ def __init__(self):
 
     def subscribe(self):
         self.sub_spectrum = rospy.Subscriber(
-            '~spectrum', Spectrum, self._cb, queue_size=1000)
+            '~spectrum', Spectrum, self._cb, queue_size=self.queue_size)
 
     def unsubscribe(self):
         self.sub_spectrum.unregister()
 
     def _cb(self, msg):
+        # Keep max_rate
+        if self.min_period > 0:
+            curr_tm = rospy.Time.now().to_sec()
+            if self.prev_pub_tm is not None:
+                elapsed_tm = curr_tm - self.prev_pub_tm
+                if elapsed_tm < 0:
+                    # Time moved backwards (e.g., rosbag play --loop)
+                    pass
+                elif elapsed_tm < self.min_period:
+                    return
+            self.prev_pub_tm = curr_tm
         # Plot spectrum
         self.amp = np.array(msg.amplitude)
         self.freq = np.array(msg.frequency)
         self.line.set_data(self.freq, self.amp)
         self.ax.set_xlim((self.freq.min(), self.freq.max()))
-        self.ax.set_ylim((0.0, 20))
+        self.ax.set_ylim((self.min_amp, self.max_amp))
         self.ax.legend(loc='upper right')
+        self.fig.tight_layout()
         if self.pub_img.get_num_connections() > 0:
             bridge = cv_bridge.CvBridge()
             img = convert_matplotlib_to_img(self.fig)
diff --git a/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py b/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py
index f5be653334..86d7c21451 100755
--- a/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py
+++ b/audio_to_spectrogram/scripts/spectrum_to_spectrogram.py
@@ -1,9 +1,13 @@
 #!/usr/bin/env python
 
+from __future__ import division
+
 import cv2
 from cv_bridge import CvBridge
 import numpy as np
 import rospy
+from distutils.version import LooseVersion
+import pkg_resources
 
 from jsk_recognition_msgs.msg import Spectrum
 from sensor_msgs.msg import Image
@@ -34,10 +38,18 @@ def __init__(self):
             '~spectrogram', Image, queue_size=1)
         publish_rate = rospy.get_param(
             '~publish_rate', float(self.image_width / self.spectrogram_period))
-        rospy.Timer(
-            rospy.Duration(
-                1.0 / publish_rate),
-            self.timer_cb)
+
+        timer_kwargs = dict(
+            period=rospy.Duration(1.0 / publish_rate),
+            callback=self.timer_cb,
+            oneshot=False,
+        )
+        if (LooseVersion(pkg_resources.get_distribution('rospy').version) >=
+                LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None):
+            # on >=kinetic, it raises ROSTimeMovedBackwardsException
+            # when we use rosbag play --loop.
+            timer_kwargs['reset'] = True
+        self.timer = rospy.Timer(**timer_kwargs)
         self.bridge = CvBridge()
 
     def audio_cb(self, msg):
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py b/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py
index d8d91d0b47..657ce0c11c 100644
--- a/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/__init__.py
@@ -1,2 +1,6 @@
-from .audio_buffer import AudioBuffer
-from .convert_matplotlib import convert_matplotlib_to_img
+from audio_to_spectrogram.audio_buffer import AudioBuffer
+from audio_to_spectrogram.compat import check_matplotlib_version
+from audio_to_spectrogram.convert_matplotlib import convert_matplotlib_to_img
+from audio_to_spectrogram.data_amplitude_plot import DataAmplitudePlot
+from audio_to_spectrogram.data_buffer import DataBuffer
+from audio_to_spectrogram.data_to_spectrum import DataToSpectrum
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py b/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py
index 6c73c39c15..41f54ed618 100644
--- a/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/audio_buffer.py
@@ -1,14 +1,9 @@
-from __future__ import division
-
-import array
-from threading import Lock
-
-from audio_common_msgs.msg import AudioData
-import numpy as np
 import rospy
 
+from audio_to_spectrogram.data_buffer import DataBuffer
 
-class AudioBuffer(object):
+
+class AudioBuffer(DataBuffer):
 
     def __init__(self, topic_name='~audio',
                  input_sample_rate=16000,
@@ -18,44 +13,21 @@ def __init__(self, topic_name='~audio',
                  get_latest_data=False,
                  discard_data=False,
                  auto_start=False):
-        self.is_subscribing = True
-        self.get_latest_data = get_latest_data
-        self.discard_data = discard_data
-        self._window_size = window_size
-        self.audio_buffer_len = int(self._window_size * input_sample_rate)
-        self.lock = Lock()
-        self.bitdepth = bitdepth
-        self.n_channel = n_channel
-        self.target_channel = min(self.n_channel - 1, max(0, target_channel))
-        self.input_sample_rate = input_sample_rate
-        self.type_code = {}
-        for code in ['b', 'h', 'i', 'l']:
-            self.type_code[array.array(code).itemsize] = code
-
-        self.dtype = self.type_code[self.bitdepth / 8]
-        self.audio_buffer = np.array([], dtype=self.dtype)
-
-        self.max_value = 2 ** (self.bitdepth - 1) - 1
-
-        self.topic_name = topic_name
-
-        if auto_start:
-            self.subscribe()
-
-    def __len__(self):
-        return len(self.audio_buffer)
 
-    @property
-    def window_size(self):
-        return self._window_size
-
-    @window_size.setter
-    def window_size(self, size):
-        with self.lock:
-            self._window_size = size
-            self.audio_buffer_len = int(self._window_size
-                                        * self.input_sample_rate)
-            self.audio_buffer = np.array([], dtype=self.dtype)
+        super(AudioBuffer, self).__init__(
+            topic_name=topic_name,
+            expr_to_get_data='m.data',  # field of audio_common_msgs/AudioData
+            input_sample_rate=input_sample_rate,
+            window_size=window_size,
+            is_integer=True,
+            is_signed=True,
+            bitdepth=bitdepth,
+            n_channel=n_channel,
+            target_channel=target_channel,
+            get_latest_data=get_latest_data,
+            discard_data=discard_data,
+            auto_start=auto_start,
+        )
 
     @staticmethod
     def from_rosparam(**kwargs):
@@ -68,51 +40,3 @@ def from_rosparam(**kwargs):
                            n_channel=n_channel,
                            target_channel=target_channel,
                            **kwargs)
-
-    def subscribe(self):
-        self.audio_buffer = np.array([], dtype=self.dtype)
-        self.sub_audio = rospy.Subscriber(
-            self.topic_name, AudioData, self.audio_cb)
-
-    def unsubscribe(self):
-        self.sub_audio.unregister()
-
-    def _read(self, size, normalize=False):
-        with self.lock:
-            if self.get_latest_data:
-                audio_buffer = self.audio_buffer[-size:]
-            else:
-                audio_buffer = self.audio_buffer[:size]
-                if self.discard_data:
-                    self.audio_buffer = self.audio_buffer[size:]
-        if normalize is True:
-            audio_buffer = audio_buffer / self.max_value
-        return audio_buffer
-
-    def sufficient_data(self, size):
-        return len(self.audio_buffer) < size
-
-    def read(self, size=None, wait=False, normalize=False):
-        if size is None:
-            size = self.audio_buffer_len
-        size = int(size * self.input_sample_rate)
-        while wait is True \
-                and not rospy.is_shutdown() and len(self.audio_buffer) < size:
-            rospy.sleep(0.001)
-        return self._read(size, normalize=normalize)
-
-    def close(self):
-        try:
-            self.sub_audio.unregister()
-        except Exception:
-            pass
-        self.audio_buffer = np.array([], dtype=self.dtype)
-
-    def audio_cb(self, msg):
-        audio_buffer = np.frombuffer(msg.data, dtype=self.dtype)
-        audio_buffer = audio_buffer[self.target_channel::self.n_channel]
-        with self.lock:
-            self.audio_buffer = np.append(
-                self.audio_buffer, audio_buffer)
-            self.audio_buffer = self.audio_buffer[
-                -self.audio_buffer_len:]
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/compat.py b/audio_to_spectrogram/src/audio_to_spectrogram/compat.py
new file mode 100644
index 0000000000..2a3cce1c82
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/compat.py
@@ -0,0 +1,25 @@
+import os
+import subprocess
+import warnings
+
+import matplotlib
+
+
+def check_matplotlib_version(
+        python_version=os.getenv('ROS_PYTHON_VERSION')):
+    if python_version == '2':
+        python = 'python'
+    else:
+        python = 'python{}'.format(python_version)
+    out = subprocess.check_output(
+        ['dpkg', '-L', '{}-matplotlib'.format(python)])
+    if isinstance(out, bytes):
+        out = out.decode('utf-8')
+    matplotlib_filelists = out.split()
+    if os.path.dirname(matplotlib.__file__) in matplotlib_filelists:
+        return True
+    else:
+        warnings.warn(
+            "It looks like you are using pip's matplotlib. "
+            "Apt's matplotlib is recommended.")
+    return False
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py b/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py
index 0b11d6b801..4324cda94e 100644
--- a/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/convert_matplotlib.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import PIL
+import PIL.Image
 
 
 def convert_matplotlib_to_img(fig):
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py
new file mode 100644
index 0000000000..dd7311fc99
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_amplitude_plot.py
@@ -0,0 +1,100 @@
+from __future__ import division
+
+from distutils.version import LooseVersion
+import pkg_resources
+import cv_bridge
+from dynamic_reconfigure.server import Server
+from jsk_topic_tools import ConnectionBasedTransport
+import matplotlib
+from audio_to_spectrogram.compat import check_matplotlib_version; check_matplotlib_version()
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import numpy as np
+import rospy
+import sensor_msgs.msg
+
+from audio_to_spectrogram.cfg import DataAmplitudePlotConfig as Config
+from audio_to_spectrogram.convert_matplotlib import convert_matplotlib_to_img
+from audio_to_spectrogram.data_buffer import DataBuffer
+
+
+class DataAmplitudePlot(ConnectionBasedTransport):
+
+    def __init__(self, data_buffer=None):
+        super(DataAmplitudePlot, self).__init__()
+
+        if data_buffer is None:
+            self.data_buffer = DataBuffer.from_rosparam()
+        else:
+            self.data_buffer = data_buffer
+
+        # Set matplotlib config
+        self.fig = plt.figure(figsize=(8, 5))
+        self.ax = self.fig.add_subplot(1, 1, 1)
+        self.ax.grid(True)
+        self.ax.set_xlabel('Time [s]', fontsize=12)
+        self.ax.set_ylabel('Amplitude', fontsize=12)
+        self.line, = self.ax.plot([0, 0], label='Amplitude of Input Data')
+
+        # ROS dynamic reconfigure
+        self.srv = Server(Config, self.config_callback)
+
+        self.pub_img = self.advertise(
+            '~output/viz', sensor_msgs.msg.Image, queue_size=1)
+
+    def start_timer(self):
+        rate = rospy.get_param('~rate', 10)
+        if rate == 0:
+            rospy.logwarn('You cannot set 0 as the rate; change it to 10.')
+            rate = 10
+
+        timer_kwargs = dict(
+            period=rospy.Duration(1.0 / rate),
+            callback=self.timer_cb,
+            oneshot=False,
+        )
+        if (LooseVersion(pkg_resources.get_distribution('rospy').version) >=
+                LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None):
+            # on >=kinetic, it raises ROSTimeMovedBackwardsException
+            # when we use rosbag play --loop.
+            timer_kwargs['reset'] = True
+        self.timer = rospy.Timer(**timer_kwargs)
+
+    def stop_timer(self):
+        self.timer.shutdown()
+        self.timer = None
+
+    def subscribe(self):
+        self.data_buffer.subscribe()
+        self.start_timer()
+
+    def unsubscribe(self):
+        self.data_buffer.unsubscribe()
+        self.stop_timer()
+
+    def config_callback(self, config, level):
+        self.maximum_amplitude = config.maximum_amplitude
+        self.window_size = config.window_size
+        self.data_buffer.window_size = self.window_size
+        return config
+
+    def timer_cb(self, timer):
+        window_size = self.window_size
+        amp = self.data_buffer.read(window_size)
+        if len(amp) == 0:
+            return
+        times = np.linspace(-window_size, 0.0, len(amp))
+
+        # Plot data amplitude.
+        self.line.set_data(times, amp)
+        self.ax.set_xlim((times.min(), times.max()))
+        self.ax.set_ylim((-self.maximum_amplitude, self.maximum_amplitude))
+
+        self.ax.legend(loc='upper right')
+        self.fig.tight_layout()
+        if self.pub_img.get_num_connections() > 0:
+            bridge = cv_bridge.CvBridge()
+            img = convert_matplotlib_to_img(self.fig)
+            img_msg = bridge.cv2_to_imgmsg(img, encoding='rgb8')
+            img_msg.header.stamp = rospy.Time.now()
+            self.pub_img.publish(img_msg)
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py
new file mode 100644
index 0000000000..aef9609edf
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_buffer.py
@@ -0,0 +1,170 @@
+from __future__ import division
+
+import array
+from threading import Lock
+
+import numpy as np
+import rospy
+import rostopic
+
+
+class DataBuffer(object):
+
+    def __init__(self, topic_name='~input',
+                 expr_to_get_data='m.data',
+                 input_sample_rate=500,
+                 window_size=10.0,
+                 is_integer=False,
+                 is_signed=True,
+                 bitdepth=64,
+                 n_channel=1, target_channel=0,
+                 get_latest_data=False,
+                 discard_data=False,
+                 auto_start=False):
+        self.is_subscribing = True
+        self.get_latest_data = get_latest_data
+        self.discard_data = discard_data
+        self._window_size = window_size
+        self.data_buffer_len = int(self._window_size * input_sample_rate)
+        self.lock = Lock()
+        self.bitdepth = bitdepth
+        self.n_channel = n_channel
+        self.target_channel = min(self.n_channel - 1, max(0, target_channel))
+        self.input_sample_rate = input_sample_rate
+        self.type_code = {}
+        if is_integer:
+            if is_signed:
+                codes = ['b', 'h', 'i', 'l']
+            else:
+                codes = ['B', 'H', 'I', 'L']
+        else:
+            codes = ['f', 'd']
+        for code in codes:
+            self.type_code[array.array(code).itemsize] = code
+
+        self.dtype = self.type_code[self.bitdepth / 8]
+        self.data_buffer = np.array([], dtype=self.dtype)
+
+        if is_integer:
+            self.max_value = 2 ** (self.bitdepth - 1) - 1
+        else:
+            self.max_value = None
+
+        self.topic_name = topic_name
+        self.expr_to_get_data = expr_to_get_data
+
+        if auto_start:
+            self.subscribe()
+
+    def __len__(self):
+        return len(self.data_buffer)
+
+    @property
+    def window_size(self):
+        return self._window_size
+
+    @window_size.setter
+    def window_size(self, size):
+        with self.lock:
+            self._window_size = size
+            self.data_buffer_len = int(self._window_size
+                                       * self.input_sample_rate)
+            self.data_buffer = np.array([], dtype=self.dtype)
+
+    @staticmethod
+    def from_rosparam(**kwargs):
+        expr_to_get_data = rospy.get_param(
+            '~expression_to_get_data', 'm.data')
+        n_channel = rospy.get_param('~n_channel', 1)
+        target_channel = rospy.get_param('~target_channel', 0)
+        data_sampling_rate = rospy.get_param('~data_sampling_rate', 500)
+        is_integer = rospy.get_param('~is_integer', False)
+        is_signed = rospy.get_param('~is_signed', True)
+        bitdepth = rospy.get_param('~bitdepth', 64)
+        return DataBuffer(expr_to_get_data=expr_to_get_data,
+                          input_sample_rate=data_sampling_rate,
+                          is_integer=is_integer,
+                          is_signed=is_signed,
+                          bitdepth=bitdepth,
+                          n_channel=n_channel,
+                          target_channel=target_channel,
+                          **kwargs)
+
+    def subscribe(self):
+        self.data_buffer = np.array([], dtype=self.dtype)
+        # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L86-L87
+        input_class, input_topic, self.input_fn = rostopic.get_topic_class(
+            rospy.resolve_name(self.topic_name), blocking=True)
+        # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L95
+        self.sub = rospy.Subscriber(
+            input_topic, input_class, self.input_cb)
+
+    def unsubscribe(self):
+        self.sub.unregister()
+
+    def _read(self, size, normalize=False):
+        with self.lock:
+            if self.get_latest_data:
+                data_buffer = self.data_buffer[-size:]
+            else:
+                data_buffer = self.data_buffer[:size]
+                if self.discard_data:
+                    self.data_buffer = self.data_buffer[size:]
+        if normalize is True:
+            if self.max_value is not None:
+                data_buffer = data_buffer / self.max_value
+            else:
+                rospy.logerr('normalize is disabled with current settings')
+        return data_buffer
+
+    def sufficient_data(self, size):
+        return len(self.data_buffer) < size
+
+    def read(self, size=None, wait=False, normalize=False):
+        if size is None:
+            size = self.data_buffer_len
+        size = int(size * self.input_sample_rate)
+        while wait is True \
+                and not rospy.is_shutdown() and len(self.data_buffer) < size:
+            rospy.sleep(0.001)
+        return self._read(size, normalize=normalize)
+
+    def close(self):
+        try:
+            self.sub.unregister()
+        except Exception:
+            pass
+        self.data_buffer = np.array([], dtype=self.dtype)
+
+    def input_cb(self, m):
+        # https://github.com/ros/ros_comm/blob/1.14.13/tools/topic_tools/scripts/transform#L98-L99
+        if self.input_fn is not None:
+            m = self.input_fn(m)
+        data = eval(self.expr_to_get_data)
+        data_type = type(data)
+        if data_type is bytes:
+            data_buffer = np.frombuffer(data, dtype=self.dtype)
+        elif data_type in [list, tuple]:
+            data_buffer = np.fromiter(data, dtype=self.dtype)
+        elif data_type in [int, float]:
+            data_buffer = np.array([data], dtype=self.dtype)
+        else:
+            rospy.logerr('Unsupported data type: {}'.format(data_type))
+            return
+        # This division into cases assumes
+        # expr_to_get_data returns a Python built-in numeric or sequence type
+        # deserialized from a ROS msg by scripts generated by genpy.
+        # Other than uint8[], this deserialization is executed with
+        # struct.Struct.unpack() and it returns int or float (or its tuple).
+        # Against uint8[], this deserialization returns bytes.
+        # /opt/ros/melodic/lib/python2.7/dist-packages/std_msgs/msg/_Float64.py
+        # /opt/ros/melodic/lib/python2.7/dist-packages/audio_common_msgs/msg/_AudioData.py
+        # https://docs.python.org/2.7/library/struct.html#format-characters
+        # https://github.com/ros/genpy/blob/0.6.16/src/genpy/generate_struct.py#L165
+
+        data_buffer = data_buffer[self.target_channel::self.n_channel]
+        with self.lock:
+            self.data_buffer = np.append(
+                self.data_buffer, data_buffer)
+            self.data_buffer = self.data_buffer[
+                -self.data_buffer_len:]
diff --git a/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py b/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py
new file mode 100644
index 0000000000..adcd01f98d
--- /dev/null
+++ b/audio_to_spectrogram/src/audio_to_spectrogram/data_to_spectrum.py
@@ -0,0 +1,103 @@
+from __future__ import division
+
+import numpy as np
+import rospy
+from distutils.version import LooseVersion
+import pkg_resources
+
+from jsk_recognition_msgs.msg import Spectrum
+
+from audio_to_spectrogram.data_buffer import DataBuffer
+
+
+class DataToSpectrum(object):
+
+    def __init__(self, data_buffer=None):
+        super(DataToSpectrum, self).__init__()
+
+        if data_buffer is None:
+            self.data_buffer = DataBuffer.from_rosparam(auto_start=True)
+        else:
+            self.data_buffer = data_buffer
+        fft_sampling_period = rospy.get_param('~fft_sampling_period', 0.3)
+        self.data_buffer.window_size = fft_sampling_period
+        data_sampling_rate = self.data_buffer.input_sample_rate
+
+        # fft config
+        window_function = np.arange(
+            0.0, 1.0, 1.0 / self.data_buffer.data_buffer_len)
+        self.window_function = 0.54 - 0.46 * np.cos(
+            2 * np.pi * window_function)
+        self.freq = np.fft.fftfreq(
+            self.data_buffer.data_buffer_len, d=1./data_sampling_rate)
+        # How many times fft is executed in one second
+        # fft_exec_rate equals to output spectrogram hz
+        self.fft_exec_rate = rospy.get_param('~fft_exec_rate', 50)
+
+        # Publisher and Subscriber
+        self.pub_spectrum = rospy.Publisher(
+            '~spectrum', Spectrum, queue_size=1)
+        self.pub_norm_half_spectrum = rospy.Publisher(
+            '~normalized_half_spectrum', Spectrum, queue_size=1)
+        self.pub_log_spectrum = rospy.Publisher(
+            '~log_spectrum', Spectrum, queue_size=1)
+
+        timer_kwargs = dict(
+            period=rospy.Duration(1.0 / self.fft_exec_rate),
+            callback=self.timer_cb,
+            oneshot=False,
+        )
+        if (LooseVersion(pkg_resources.get_distribution('rospy').version) >=
+                LooseVersion('1.12.0')) and rospy.get_param('/use_sim_time', None):
+            # on >=kinetic, it raises ROSTimeMovedBackwardsException
+            # when we use rosbag play --loop.
+            timer_kwargs['reset'] = True
+        self.timer = rospy.Timer(**timer_kwargs)
+
+    def publish_spectrum(self, pub, stamp, amp, freq):
+        spectrum_msg = Spectrum()
+        spectrum_msg.header.stamp = stamp
+        spectrum_msg.amplitude = amp
+        spectrum_msg.frequency = freq
+        pub.publish(spectrum_msg)
+
+    def timer_cb(self, timer):
+        if len(self.data_buffer) != self.data_buffer.data_buffer_len:
+            return
+        data = self.data_buffer.read()
+        stamp = rospy.Time.now()
+        # Calc spectrum by fft
+        fft = np.fft.fft(data * self.window_function)
+        self.publish_spectrum(
+            self.pub_spectrum,
+            stamp,
+            np.abs(fft),
+            self.freq,
+            # Usual "amplitude spectrum".
+            # https://ryo-iijima.com/fftresult/
+        )
+        self.publish_spectrum(
+            self.pub_norm_half_spectrum,
+            stamp,
+            np.abs(fft[self.freq >= 0] /
+                   (self.data_buffer.data_buffer_len / 2.0)),
+            self.freq[self.freq >= 0],
+            # Spectrum which is "half"
+            # (having non-negative frequencies (0Hz-Nyquist frequency))
+            # and is "normalized"
+            # (consistent with the amplitude of the original signal)
+            # https://ryo-iijima.com/fftresult/
+            # https://stackoverflow.com/questions/63211851/why-divide-the-output-of-numpy-fft-by-n
+            # https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issue-1550715400
+        )
+        self.publish_spectrum(
+            self.pub_log_spectrum,
+            stamp,
+            np.log(np.abs(fft)),
+            self.freq,
+            # Usually, log is applied to "power spectrum" (np.abs(fft)**2):
+            # np.log(np.abs(fft)**2), 20*np.log10(np.abs(fft)), ...
+            # But we use the above equation for simplicity.
+            # https://github.com/jsk-ros-pkg/jsk_recognition/issues/2761#issuecomment-1445810380
+            # http://makotomurakami.com/blog/2020/05/23/5266/
+        )
diff --git a/audio_to_spectrogram/test/audio_to_spectrogram.test b/audio_to_spectrogram/test/audio_to_spectrogram.test
index 671975f661..41a687e295 100644
--- a/audio_to_spectrogram/test/audio_to_spectrogram.test
+++ b/audio_to_spectrogram/test/audio_to_spectrogram.test
@@ -11,6 +11,10 @@
     <rosparam>
       topic_0: /spectrum_to_spectrogram/spectrogram
       timeout_0: 30
+      topic_1: /audio_amplitude_plot/output/viz
+      timeout_1: 30
+      topic_2: /preprocessed_spectrogram_source_plot/output/viz
+      timeout_2: 30
     </rosparam>
   </test>
 
diff --git a/audio_to_spectrogram/test/wrench_to_spectrogram.test b/audio_to_spectrogram/test/wrench_to_spectrogram.test
new file mode 100644
index 0000000000..78281615c9
--- /dev/null
+++ b/audio_to_spectrogram/test/wrench_to_spectrogram.test
@@ -0,0 +1,21 @@
+<launch>
+
+  <include file="$(find audio_to_spectrogram)/sample/sample_wrench_to_spectrogram.launch">
+    <arg name="gui" value="false" />
+  </include>
+
+  <test test-name="test_wrench_to_spectrogram"
+        name="test_wrench_to_spectrogram"
+        pkg="jsk_tools" type="test_topic_published.py"
+        retry="3">
+    <rosparam>
+      topic_0: /spectrum_to_spectrogram/spectrogram
+      timeout_0: 30
+      topic_1: /data_amplitude_plot/output/viz
+      timeout_1: 30
+      topic_2: /preprocessed_spectrogram_source_plot/output/viz
+      timeout_2: 30
+    </rosparam>
+  </test>
+
+</launch>
diff --git a/checkerboard_detector/src/objectdetection_tf_publisher.py b/checkerboard_detector/src/objectdetection_tf_publisher.py
index f38782074f..bdcb339a28 100755
--- a/checkerboard_detector/src/objectdetection_tf_publisher.py
+++ b/checkerboard_detector/src/objectdetection_tf_publisher.py
@@ -16,9 +16,9 @@ def __init__(self):
             self.br = tf.TransformBroadcaster()
             self.subscriber = rospy.Subscriber("ObjectDetection", ObjectDetection, self.simple_callback);
         else:
-            self.init_object_messages()
             self.frame_id = rospy.get_param("~frame_id", "object")
             self.subscriber = rospy.Subscriber("ObjectDetection", ObjectDetection, self.callback);
+            self.init_object_messages()
 
     def init_object_messages(self):
         if rospy.has_param('~checker_board_params/position_x'):
diff --git a/doc/install_chainer_gpu.rst b/doc/install_chainer_gpu.md
similarity index 67%
rename from doc/install_chainer_gpu.rst
rename to doc/install_chainer_gpu.md
index ce5fc43147..cd4debf0b4 100644
--- a/doc/install_chainer_gpu.rst
+++ b/doc/install_chainer_gpu.md
@@ -15,37 +15,37 @@ Requirements
 Version Compatibilities for 18.04
 ---------------------------------
 
-(Recommended) Use CUDA 9.1 from Official ubuntu repository (https://packages.ubuntu.com/bionic/nvidia-cuda-dev)
+(Recommended) Use CUDA 9.1 from Official ubuntu repository [https://launchpad.net/ubuntu/bionic/+package/nvidia-cuda-dev](https://launchpad.net/ubuntu/bionic/+package/nvidia-cuda-dev)
 
 - Chainer
 
-  - chainer == 6.7.0 (last version supoprting python2. See https://github.com/chainer/chainer/releases/tag/v6.7.0)
-  - cupy-cuda91 == 6.7.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support https://docs.chainer.org/en/v6.7.0/install.html)
+  - chainer == 6.7.0 (last version supoprting python2. See [https://github.com/chainer/chainer/releases/tag/v6.7.0](https://github.com/chainer/chainer/releases/tag/v6.7.0)
+  - cupy-cuda91 == 6.7.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support [https://docs.chainer.org/en/v6.7.0/install.html](https://docs.chainer.org/en/v6.7.0/install.html)
 
 - PyTorch
 
-  - pytorch == 1.1.0 (Latest pytorch version supporting CUDA 9.1 https://download.pytorch.org/whl/cu90/torch_stable.html)
-  - CUDA >= 9.0 (Minimum required version for PyTorch 1.1.0 https://pytorch.org/get-started/previous-versions/#v110)
+  - pytorch == 1.1.0 (Latest pytorch version supporting CUDA 9.1 [https://download.pytorch.org/whl/cu90/torch_stable.html](https://download.pytorch.org/whl/cu90/torch_stable.html)
+  - CUDA >= 9.0 (Minimum required version for PyTorch 1.1.0 [https://pytorch.org/get-started/previous-versions/#v110](https://pytorch.org/get-started/previous-versions/#v110)
 
-(Experimental) Use CUDA 10.2 from Nvidia Developer's site (https://developer.nvidia.com/cuda-10.2-download-archive)
+(Experimental) Use CUDA 10.2 from Nvidia Developer's site [https://developer.nvidia.com/cuda-10.2-download-archive](https://developer.nvidia.com/cuda-10.2-download-archive)
 
 - Chainer
 
-  - chainer == 6.7.0 (last version supoprting python2. See https://github.com/chainer/chainer/releases/tag/v6.7.0)
-  - cupy >=6.7.0,<7.0.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support https://docs.chainer.org/en/v6.7.0/install.html)
+  - chainer == 6.7.0 (last version supoprting python2. See [https://github.com/chainer/chainer/releases/tag/v6.7.0](https://github.com/chainer/chainer/releases/tag/v6.7.0)
+  - cupy >=6.7.0,<7.0.0 (chainer v6.7.0 requires cupy/cudnn for hardware acceleration support [https://docs.chainer.org/en/v6.7.0/install.html](https://docs.chainer.org/en/v6.7.0/install.html)
   - cuDNN < 8 (cupy 6.7.0 requires cuDNN v5000= and <=v7999)
-  - CUDA 10.2 (cuDNN v7.6.5 requires CUDA 10.2 https://developer.nvidia.com/rdp/cudnn-archive)
+  - CUDA 10.2 (cuDNN v7.6.5 requires CUDA 10.2 [https://developer.nvidia.com/rdp/cudnn-archive](https://developer.nvidia.com/rdp/cudnn-archive))
 
 - PyTorch
 
   - pytorch >= 1.4.0
-  - CUDA >= 9.2 (Minimum required version for PyTorch https://pytorch.org/get-started/previous-versions/#v140)
-  - Driver Version >= 396.26 (From CUDA Toolkit and Corresponding Driver Versions in https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)
+  - CUDA >= 9.2 (Minimum required version for PyTorch [https://pytorch.org/get-started/previous-versions/#v140](https://pytorch.org/get-started/previous-versions/#v140)
+  - Driver Version >= 396.26 (From CUDA Toolkit and Corresponding Driver Versions in [https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html)
 
 Install CUDA
 ------------
 
-- Ubuntu 14.04 : Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux::
+- Ubuntu 14.04 : Download deb file from [https://developer.nvidia.com/cuda-downloads?target_os=Linux](https://developer.nvidia.com/cuda-downloads?target_os=Linux):
 
 ```bash
     # If you'd like to use CUDA8.0 on Ubuntu 14.04.
@@ -56,7 +56,7 @@ Install CUDA
     sudo apt-get install cuda
 ```
 
-  - Add below to your `~/.bashrc`::
+  - Add below to your `~/.bashrc`:
 
     ```bash
     # setup cuda & cudnn
@@ -76,7 +76,7 @@ Install CUDA
     ```
 
 
-- Ubuntu 16.04 : Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux::
+- Ubuntu 16.04 : Download deb file from [https://developer.nvidia.com/cuda-downloads?target_os=Linux](https://developer.nvidia.com/cuda-downloads?target_os=Linux):
 
 ```bash
     # If you'd like to use CUDA9.2 on Ubuntu 16.04.
@@ -91,7 +91,7 @@ Install CUDA
   sudo apt install nvidia-cuda-toolkit
   sudo apt install nvidia-cuda-dev
 
-- (Experimental) Ubuntu 18.04 : CUDA 10.2 is the latest version which supports `jsk_perception`. Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux::
+- (Experimental) Ubuntu 18.04 : CUDA 10.2 is the latest version which supports `jsk_perception`. Download deb file from https://developer.nvidia.com/cuda-downloads?target_os=Linux:
 
     ```bash
     # If you'd like to use CUDA10.2 on Ubuntu 18.04.
@@ -124,11 +124,11 @@ Install CUDA
 Install CUDNN
 -------------
 
-- If you install `pip install cupy-cuda91`, you do not need to install CUDNN manually. (c.f. https://github.com/jsk-ros-pkg/jsk_visualization/issues/809). Thus, default 18.04 user can use CUDA 9.1 and `cupy-cuda91==6.7.0` for `chainer==6.7.0` and you can SKIP this section.
+- If you install `pip install cupy-cuda91`, you do not need to install CUDNN manually. (c.f. [https://github.com/jsk-ros-pkg/jsk_visualization/issues/809](https://github.com/jsk-ros-pkg/jsk_visualization/issues/809)). Thus, default 18.04 user can use CUDA 9.1 and `cupy-cuda91==6.7.0` for `chainer==6.7.0` and you can SKIP this section.
 
   Installing CUDNN manually only requires for experimental user who install CUDA 10.2 manually.
 
-- You need to login at https://developer.nvidia.com/cudnn
+- You need to login at [https://developer.nvidia.com/cudnn](https://developer.nvidia.com/cudnn)
 - Go to cuDNN Download and choose version
 - Download deb files of cuDNN Runtime Library and cuDNN Developer Library
 
@@ -156,7 +156,7 @@ Install Cupy
 - (Default) Chainer 6.7.0 requires CuPy 6.7.0 and if you have CUDA 9.1, you can use CuPy pre-compiled binary package.
 
 
-  - Pre-compiled Install Cupy for CUDA 9.1 ::
+  - Pre-compiled Install Cupy for CUDA 9.1 :
 
     ```bash
     sudo pip install cupy-cuda91==6.7.0
@@ -164,7 +164,7 @@ Install Cupy
 
 - (Experimental) If you have newer CUDA version. You need to install CuPy with source distribution. This requires CUDNN before you run `pip install cupy` .
 
-  - Source Install Cupy for CUDA 10.2 ::
+  - Source Install Cupy for CUDA 10.2 :
 
     ```bash
     sudo pip install -vvv cupy --no-cache-dir
@@ -174,7 +174,7 @@ Install Cupy
 Install PyTorch
 ---------------
 
-- 18.04 provides CUDA 9.1 by defualt. To install PyTorch compatible with this version, download following wheel from https://download.pytorch.org/whl/cu90/torch_stable.html, and install manually.
+- 18.04 provides CUDA 9.1 by defualt. To install PyTorch compatible with this version, download following wheel from [https://download.pytorch.org/whl/cu90/torch_stable.html](https://download.pytorch.org/whl/cu90/torch_stable.html), and install manually.
 
 ```bash
    sudo pip install torch-1.1.0-cp27-cp27mu-linux_x86_64.whl
@@ -187,12 +187,12 @@ Install PyTorch
    sudo pip install torch==1.4.0
 ```
 
-- See https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260 for more info.
+- See [https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260](https://github.com/jsk-ros-pkg/jsk_recognition/pull/2601#issuecomment-876948260) for more info.
 
 Try Chainer Samples
 -----------
 
-You can try to run samples to check if the installation succeeded::
+You can try to run samples to check if the installation succeeded:
 
     roslaunch jsk_perception sample_fcn_object_segmentation.launch gpu:=0
     roslaunch jsk_perception sample_people_pose_estimation_2d.launch GPU:=0
@@ -201,7 +201,7 @@ You can try to run samples to check if the installation succeeded::
 Try PyTorch Samples
 -----------
 
-You can try to run samples to check if the installation succeeded::
+You can try to run samples to check if the installation succeeded:
 
     roslaunch jsk_perception sample_hand_pose_estimation_2d.launch gpu:=0
 
diff --git a/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst b/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst
index 40debd2693..77a78990b8 100644
--- a/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst
+++ b/doc/jsk_pcl_ros_utils/nodes/pointcloud_to_pcd.rst
@@ -12,6 +12,12 @@ Subscribing Topic
 * ``~input`` (``sensor_msgs/PoinCloud2``)
 
 
+Advertising Services
+-----------------
+* ``~save_pcd`` (``std_srvs/Empty``)
+
+  Convert PointCloud2 to pcd, saved under `{prefix}{stamp}.pcd`.
+
 Parameters
 ----------
 
@@ -37,3 +43,5 @@ Parameters
 * ``duration`` (Double, default: ``1.0``)
 
   Saving duration. You can change saving frequency with this parameter.
+  If the duration is greater than 0.0, the pcd data is stored continuously under the set duration.
+  When you want to use this as ROS service, set the duration to 0.0.
diff --git a/doc/jsk_perception/nodes/aws_detect_faces.rst b/doc/jsk_perception/nodes/aws_detect_faces.rst
index 1c23d9fc94..6bbd29abf4 100644
--- a/doc/jsk_perception/nodes/aws_detect_faces.rst
+++ b/doc/jsk_perception/nodes/aws_detect_faces.rst
@@ -22,7 +22,7 @@ Subscribing Topic
 Publishing Topic
 ----------------
 
-* ``~faces`` (``opencv_msgs/FaceArrayStamped``)
+* ``~faces`` (``opencv_apps/FaceArrayStamped``)
 
   Detected face positions, facial landmarks, emotions, presence of beard, sunglasses, and so on.
 
diff --git a/doc/jsk_perception/nodes/classification_node.md b/doc/jsk_perception/nodes/classification_node.md
new file mode 100644
index 0000000000..3262618c48
--- /dev/null
+++ b/doc/jsk_perception/nodes/classification_node.md
@@ -0,0 +1,83 @@
+# classification_node.py
+
+![](images/clip.png)
+
+The ROS node for Classification with CLIP.
+
+## System Configuration
+![](images/large_scale_vil_system.png)
+
+This node requires to work with the Docker Container for inference. Please build the container at first following Setup instruction.
+
+### Prerequisite
+This node requires NVIDIA GPU and more than 4GB GRAM to work properly.
+You have to install nvidia-container-toolkit for using GPU with docker. Please follow [official instruction](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
+
+### Build the docker image
+You have to build the docker image of OFA
+
+```shell
+roscd jsk_perception/docker
+make
+```
+
+## Subscribing topic
+* `~image` (`sensor_msgs/Image`)
+
+  Input image
+
+## Publishing topic
+* `~result` (`jsk_recognition_msgs/ClassificationResult`)
+  
+  Classification result
+  
+* `~result/image` (`sensor_msgs/Image`)
+  
+  Images used for inference
+  
+* `~visualize` (`std_msgs/String`)
+
+  Classification result to visualize
+  
+## Action topic
+* `~inference_server/goal` (`jsk_recognition_msgs/ClassificationTaskActionGoal`) 
+  
+  Classification request with custom categories and image
+  
+* `~inference_server/result` (`jsk_recognition_msgs/ClassificationTaskActionResult`)
+  
+  Clasification result of `~inference_server/goal`
+
+## Parameters
+* `~host` (String, default: `localhost`)
+
+  The host name or IP of inference container 
+
+* `~port` (Integer, default: `8080`)
+
+  The HTTP port of inference container
+
+## Dynamic Reconfigure Parameters
+* `~queries` (string, default: `human;kettle;cup;glass`) 
+
+  Default categories used for subscribing image topic. 
+
+  You can send multiple queries with separating semicolon.
+
+### Run inference container on another host or another terminal
+In the remote GPU machine,
+```shell
+cd jsk_recognition/jsk_perception/docker
+./run_jsk_vil_api clip --port (Your vacant port)
+```
+
+In the ROS machine,
+```shell
+roslaunch jsk_perception classification.launch port:=(Your inference container port) host:=(Your inference container host) CLASSIFICATION_INPUT_IMAGE:=(Your image topic name) gui:=true 
+```
+
+
+### Run both inference container and ros node in single host 
+```
+roslaunch jsk_perception classification.launch run_api:=true CLASSIFICATION_INPUT_IMAGE:=(Your image topic name) gui:=true 
+```
diff --git a/doc/jsk_perception/nodes/images/clip.png b/doc/jsk_perception/nodes/images/clip.png
new file mode 100644
index 0000000000..059b31e32b
Binary files /dev/null and b/doc/jsk_perception/nodes/images/clip.png differ
diff --git a/doc/jsk_perception/nodes/images/invert_mask_image.jpg b/doc/jsk_perception/nodes/images/invert_mask_image.jpg
new file mode 100644
index 0000000000..041079262a
Binary files /dev/null and b/doc/jsk_perception/nodes/images/invert_mask_image.jpg differ
diff --git a/doc/jsk_perception/nodes/images/large_scale_vil_system.png b/doc/jsk_perception/nodes/images/large_scale_vil_system.png
new file mode 100644
index 0000000000..4856b85d09
Binary files /dev/null and b/doc/jsk_perception/nodes/images/large_scale_vil_system.png differ
diff --git a/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png b/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png
new file mode 100644
index 0000000000..fd1cde2b7b
Binary files /dev/null and b/doc/jsk_perception/nodes/images/point_pose_extractor_mouse.png differ
diff --git a/doc/jsk_perception/nodes/images/vqa.png b/doc/jsk_perception/nodes/images/vqa.png
new file mode 100644
index 0000000000..132e32784d
Binary files /dev/null and b/doc/jsk_perception/nodes/images/vqa.png differ
diff --git a/doc/jsk_perception/nodes/invert_mask_image.md b/doc/jsk_perception/nodes/invert_mask_image.md
new file mode 100644
index 0000000000..30be57a1a0
--- /dev/null
+++ b/doc/jsk_perception/nodes/invert_mask_image.md
@@ -0,0 +1,29 @@
+# InvertMaskImage
+
+![](images/invert_mask_image.jpg)
+
+Inverts the input mask image.
+
+## Subscribing Topic
+
+* `~input` (`sensor_msgs/Image`)
+
+  Input mask image.
+
+## Publishing Topic
+
+* `~output` (`sensor_msgs/Image`)
+
+  Inverted mask image.
+
+## Parameters
+
+* `~queue_size` (Int, default: `1`)
+
+  How many messages you allow about the subscriber to keep in the queue.
+
+## Sample
+
+```bash
+roslaunch jsk_perception sample_invert_mask_image.launch
+```
diff --git a/doc/jsk_perception/nodes/point_pose_extractor.md b/doc/jsk_perception/nodes/point_pose_extractor.md
index b7a7996917..bb28a99962 100644
--- a/doc/jsk_perception/nodes/point_pose_extractor.md
+++ b/doc/jsk_perception/nodes/point_pose_extractor.md
@@ -8,7 +8,8 @@
 
 - `/ImageFeature0D` (`posedetection_msgs::ImageFeature0D`)
 
-  Image, camera and tempalte feature information. 
+  Image, camera and template feature information.
+  You can use [ImageShift](https://jsk-docs.readthedocs.io/projects/jsk_recognition/en/latest/imagesift/nodes/imagesift.html) to get it.
 
 ## Publishing Topics
 
@@ -30,62 +31,75 @@
 
 - `/tf` (`tf2_msgs/TFMessage`)
 
-  Detected Object Frame.
+  Detected Object Frame when `~publish_tf` is set to true.
 
 ## Parameters
 
-- `~template_filename` (str default: "/sample/opencv-logo2.png")
-  + path to template image
-- `~object_width` (float)
-  + Width of template image
-- `~object_height` (float)
-  + Height of template image
-- `~reprojection_threshold` (float default: 3.0)
-- `~distanceratio_threshold` (float default: 0.49)
-- `~error_threshold` (float default: 50.0)
-- `~theta_step` (float default: 5.0)
-- `~phi_step` (float default: 5.0)
-- `~viewer_window` (bool default: true)
-- `~window_name` (str default: "sample1")
-- `~autosize` (bool default: false)
-- `~publish_null_object_detection` (bool default: false)
-- `~publish_tf` (bool defaut: false)
- + If set to true, detected object pose is also broadcasted as tf frame.
-- `~child_frame_id` (string default: "matching")
- + frame_id of detected object when `~broadcast_tf` is set to true.
+- `~template_filename` (str default: `"/sample/opencv-logo2.png"`)
+
+  path to template image
+
+- `~object_width` (float default: `0.06`)
+
+  Width of template image
+  
+- `~object_height` (float default: `0.0739`)
+
+  Height of template image
+
+- `~relative_pose` (str default: `"0 0 0 0 0 0 1"`)
+
+  Coordinate of the object relative to the texture
+  
+- `~reprojection_threshold` (float default: `3.0`)
+- `~distanceratio_threshold` (float default: `0.49`)
+- `~error_threshold` (float default: `50.0`)
+- `~theta_step` (float default: `5.0`)
+- `~phi_step` (float default: `5.0`)
+- `~viewer_window` (bool default: `true`)
+- `~window_name` (str default: `"sample1"`)
+- `~autosize` (bool default: `false`)
+
+  The window size is automatically adjusted to fit the displayed image, and you cannot change the window size manually.
+
+- `~publish_null_object_detection` (bool default: `false`)
+- `~publish_tf` (bool defaut: `false`)
+
+  If set to true, detected object pose is also broadcasted as tf frame.
+
+- `~child_frame_id` (string default: `"matching"`)
+
+  frame_id of detected object when `~publish_tf` is set to true.
 
 ## Service 
 
-- `/SetTemplate` 
+- `/SetTemplate` (`SetTemplate`)
 
   Used to add another template.
 
+## Mouse Event
+
+![](images/point_pose_extractor_mouse.png)
+
+  Set template from viewer window.
+
+  To specify the range of template, left-click four corners clockwise from upper left. Selected points are reset by right-clicking.
+
+  After all four points are selected, you can input template's width, height and filename. The filename should have an extention.
+  
 ## Sample
 
 `sample_point_pose_extractor.launch`
-```
-<launch>
-  <node name="nodelet_manager"
-        pkg="nodelet" type="nodelet"
-        args="manager" />
-  <node name="imagesift"
-        pkg="nodelet" type="nodelet"
-        args="load imagesift/ImageSift nodelet_manager">
-    <remap from="image" to="<rgb camera image topic name>"/>
-    <remap from="camera_info" to="<rgb camera info topic name>" />
-  </node>
-  <node name="point_pose_extractor" pkg="jsk_perception" type="point_pose_extractor"
-        respawn="true" output="screen"/>
-</launch>
-``` 
 
 ```
-roslaunch sample_point_pose_extractor.launch  
+roslaunch jsk_perception sample_point_pose_extractor.launch
 rostopic echo /ObjectDetection
 ```
 
 ### Example of how to run set_template service 
 
+  `client.call` is available only after the node receives `ImageFeature0D`.
+
 ```
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
diff --git a/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md b/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md
index e32930dfac..adfa13e358 100644
--- a/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md
+++ b/doc/jsk_perception/nodes/rect_array_in_panorama_to_bounding_box_array.md
@@ -7,10 +7,6 @@ Sample is `sample_rect_array_in_panorama_to_bounding_box_array.launch`
 
 ## Subscribing Topics
 
-* `~panorama_image` (`sensor_msgs/Image`)
-
-Panorama image topic.
-
 * `~panorama_info` (`jsk_recognition_msgs/PanoramaInfo`)
 
 Panorama info topic.
diff --git a/doc/jsk_perception/nodes/vqa_node.md b/doc/jsk_perception/nodes/vqa_node.md
new file mode 100644
index 0000000000..f31f0c0498
--- /dev/null
+++ b/doc/jsk_perception/nodes/vqa_node.md
@@ -0,0 +1,96 @@
+# vqa_node.py
+![](images/vqa.png)
+
+The ROS node for Visual Question Answering(VQA).
+
+## System Configuration
+![](images/large_scale_vil_system.png)
+
+This node requires to work with the Docker Container for inference. Please build the container at first following Setup instruction.
+
+## Setup
+
+### Prerequisite
+This node requires NVIDIA GPU and more than 4GB GRAM to work properly.
+You have to install nvidia-container-toolkit for using GPU with docker. Please follow [official instruction](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
+
+### Build the docker image
+You have to build the docker image of OFA
+
+```shell
+roscd jsk_perception/docker
+make
+```
+
+## Subscribing topic
+* `~image` (`sensor_msgs/Image`)
+
+  Input image
+
+## Publishing topic
+* `~result` (`jsk_recognition_msgs/VQAResult`)
+  
+  VQA result
+  
+* `~result/image` (`sensor_msgs/Image`)
+  
+  Images used for inference
+  
+* `~visualize` (`std_msgs/String`)
+
+  VQA question and answer to visualize
+  
+## Action topic
+* `~inference_server/goal` (`jsk_recognition_msgs/VQATaskActionGoal`) 
+  
+  VQA request with custom questions and image
+  
+* `~inference_server/result` (`jsk_recognition_msgs/VQATaskActionResult`)
+  
+  VQA result of `~inference_server/goal`
+
+## Parameters
+* `~host` (String, default: `localhost`)
+
+  The host name or IP of inference container 
+
+* `~port` (Integer, default: `8080`)
+
+  The HTTP port of inference container
+
+## Dynamic Reconfigure Parameters
+* `~questions` (string, default: `what does this image describe?`) 
+
+  Default questions used for subscribing image topic. 
+
+  You can send multiple questions with separating semicolon like the below.
+  ```
+  What does this image describe?;What kinds of objects exists?
+  ```
+
+## Sample
+
+### Run inference container on another host or another terminal
+In the remote GPU machine,
+```shell
+cd jsk_recognition/jsk_perception/docker
+./run_jsk_vil_api --port (Your vacant port) --ofa_task caption --ofa_model_scale huge
+```
+
+
+`--ofa_task` should be `caption` or `vqa`. Empirically, the output results are more natural for VQA tasks with the Caption model than with the VQA model in OFA. 
+
+
+If you encounter GPU ram shortage, please make `ofa_model_scale` `large` .
+
+
+In the ROS machine,
+```shell
+roslaunch jsk_perception vqa.launch port:=(Your inference container port) host:=(Your inference container host) VQA_INPUT_IMAGE:=(Your image topic name) gui:=true 
+```
+
+
+### Run both inference container and ros node in single host 
+```
+roslaunch jsk_perception vqa.launch run_api:=true VQA_INPUT_IMAGE:=(Your image topic name) gui:=true 
+```
diff --git a/imagesift/test/test_imagesift.test b/imagesift/test/test_imagesift.test
index 2ca00099cf..929c19e6a5 100644
--- a/imagesift/test/test_imagesift.test
+++ b/imagesift/test/test_imagesift.test
@@ -6,7 +6,8 @@
 
   <test test-name="test_imagesift"
         name="test_imagesift"
-        pkg="jsk_tools" type="test_topic_published.py">
+        pkg="jsk_tools" type="test_topic_published.py"
+        retry="3">
     <rosparam>
       topic_0: /Feature0D
       timeout_0: 10
diff --git a/jsk_pcl_ros/CMakeLists.txt b/jsk_pcl_ros/CMakeLists.txt
index 49b1b37a93..34cd2d65d9 100644
--- a/jsk_pcl_ros/CMakeLists.txt
+++ b/jsk_pcl_ros/CMakeLists.txt
@@ -142,7 +142,7 @@ generate_dynamic_reconfigure_options(
 
 find_package(OpenCV REQUIRED core imgproc)
 
-include_directories(include ${catkin_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS})
+include_directories(include ${catkin_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${PCL_INCLUDE_DIRS} ${ml_classifiers_INCLUDE_DIRS})
 link_directories(${catkin_LIBRARY_DIRS})
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
   set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -z defs")
@@ -507,10 +507,34 @@ install(TARGETS ${jsk_pcl_ros_install_libraries}
   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
 )
 
-install(DIRECTORY config euslisp launch plugins sample scripts test
+install(DIRECTORY config euslisp launch plugins test
   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
   USE_SOURCE_PERMISSIONS
 )
+if($ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
+install(DIRECTORY sample scripts
+   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+   USE_SOURCE_PERMISSIONS
+   )
+else()
+install(DIRECTORY sample scripts
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+  USE_SOURCE_PERMISSIONS
+  PATTERN "*"
+  PATTERN "*/*.py" EXCLUDE
+)
+
+file(GLOB SCRIPT_PROGRAMS scripts/*.py)
+catkin_install_python(
+  PROGRAMS ${SCRIPT_PROGRAMS}
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/scripts
+)
+file(GLOB SAMPLE_SCRIPT_PROGRAMS sample/*.py)
+catkin_install_python(
+  PROGRAMS ${SAMPLE_SCRIPT_PROGRAMS}
+  DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/sample
+)
+endif()
 
 if (CATKIN_ENABLE_TESTING)
   find_package(jsk_perception REQUIRED)
diff --git a/jsk_pcl_ros/package.xml b/jsk_pcl_ros/package.xml
index 51635bba71..b4ef3165d4 100644
--- a/jsk_pcl_ros/package.xml
+++ b/jsk_pcl_ros/package.xml
@@ -91,6 +91,8 @@
   <exec_depend>visualization_msgs</exec_depend>
   <exec_depend>yaml-cpp</exec_depend>
 
+  <test_depend>compressed_depth_image_transport</test_depend>
+  <test_depend>compressed_image_transport</test_depend>
   <test_depend>jsk_perception</test_depend>
   <test_depend>jsk_tools</test_depend>
   <test_depend>roslaunch</test_depend>
diff --git a/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml b/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml
index b77e1cb643..f88073eacd 100644
--- a/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml
+++ b/jsk_pcl_ros/sample/include/play_rosbag_baxter_realsense_l515.xml
@@ -6,11 +6,12 @@
   <arg name="RGB_IMAGE" value="/camera/color/image_rect_color" />
   <arg name="DEPTH_CAMERA_INFO" value="/camera/aligned_depth_to_color/camera_info" />
   <arg name="DEPTH_IMAGE" value="/camera/aligned_depth_to_color/image_raw" />
+  <arg name="RATE" default="1.0" />
 
   <param name="/use_sim_time" value="true"/>
 
   <node name="rosbag_play" pkg="rosbag" type="play"
-        args="$(find jsk_pcl_ros)/sample/data/baxter_realsense_l515.bag --clock --loop"/>
+        args="--rate $(arg RATE) $(find jsk_pcl_ros)/sample/data/baxter_realsense_l515.bag --clock --loop"/>
 
   <group ns="camera">
     <group ns="color">
diff --git a/jsk_pcl_ros/sample/sample_incremental_model_registration.launch b/jsk_pcl_ros/sample/sample_incremental_model_registration.launch
index a5d35e1ea8..4e43474db5 100644
--- a/jsk_pcl_ros/sample/sample_incremental_model_registration.launch
+++ b/jsk_pcl_ros/sample/sample_incremental_model_registration.launch
@@ -3,7 +3,7 @@
   <arg name="rosbag_play_args" default="--clock --loop" />
 
   <param name="/use_sim_time" value="true"/>
-  <param name="robot_description" command="$(find xacro)/xacro.py '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
+  <param name="robot_description" command="$(find xacro)/xacro '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
 
   <node name="rosbag_play"
         pkg="rosbag" type="play"
diff --git a/jsk_pcl_ros/sample/sample_joint_state_static_filter.launch b/jsk_pcl_ros/sample/sample_joint_state_static_filter.launch
index 178d3185b8..8b47a05035 100644
--- a/jsk_pcl_ros/sample/sample_joint_state_static_filter.launch
+++ b/jsk_pcl_ros/sample/sample_joint_state_static_filter.launch
@@ -3,7 +3,7 @@
   <arg name="gui" default="true"/>
 
   <param name="/use_sim_time" value="true"/>
-  <param name="robot_description" command="$(find xacro)/xacro.py '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
+  <param name="robot_description" command="$(find xacro)/xacro '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
 
   <node name="rosbag_play"
         pkg="rosbag" type="play"
diff --git a/jsk_pcl_ros/sample/sample_organized_statistical_outlier_removal.launch b/jsk_pcl_ros/sample/sample_organized_statistical_outlier_removal.launch
index 4ae2a0a164..b9618465b7 100644
--- a/jsk_pcl_ros/sample/sample_organized_statistical_outlier_removal.launch
+++ b/jsk_pcl_ros/sample/sample_organized_statistical_outlier_removal.launch
@@ -1,9 +1,11 @@
 <launch>
   <arg name="gui" default="true" />
+  <arg name="RATE" default="1.0" />
 
   <include file="$(find jsk_pcl_ros)/sample/include/play_rosbag_baxter_realsense_l515.xml">
     <arg name="launch_manager" value="true" />
     <arg name="manager" value="realsense_nodelet_manager" />
+    <arg name="RATE" value="$(arg RATE)" />
   </include>
 
   <include file="$(find jsk_pcl_ros)/launch/organized_statistical_outlier_removal.launch">
diff --git a/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py b/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py
index 5e464e0745..f18e6d01f0 100755
--- a/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py
+++ b/jsk_pcl_ros/scripts/check_depth_error/depth_error_calibration.py
@@ -348,7 +348,7 @@ def updatePlot():
 def generateFrequencyMap():
     global width, height
     # bgr
-    img = np.tile(np.uint8([0,0,0]), (height / 10, width / 10, 1))
+    img = np.tile(np.uint8([0,0,0]), (height // 10, width // 10, 1))
     frequency = dict()
     for (u, v) in value_cache.keys():
         min_color = np.uint8([255, 0, 0])
diff --git a/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp b/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp
index 8999bbd05b..abb16d9b84 100644
--- a/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp
+++ b/jsk_pcl_ros/src/incremental_model_registration_nodelet.cpp
@@ -207,6 +207,7 @@ namespace jsk_pcl_ros
     std_srvs::Empty::Request& req,
     std_srvs::Empty::Response& res)
   {
+    boost::mutex::scoped_lock lock(mutex_);
     if (samples_.size() <= 1) {
       ROS_ERROR("no enough samples");
       return false;
@@ -251,6 +252,7 @@ namespace jsk_pcl_ros
     nonregistered_ros_cloud.header.stamp = ros::Time::now();
     nonregistered_ros_cloud.header.frame_id = frame_id_;
     pub_cloud_non_registered_.publish(nonregistered_ros_cloud);
+    return true;
   }
   
 }
diff --git a/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test b/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test
index eb0e57e0b7..378be8ad4e 100644
--- a/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test
+++ b/jsk_pcl_ros/test/test_cluster_point_indices_decomposer_bbox.test
@@ -92,7 +92,7 @@
   <test test-name="test_cluster_point_indices_decomposer_bbox"
         name="test_cluster_point_indices_decomposer_bbox"
         pkg="jsk_pcl_ros" type="test_cluster_point_indices_decomposer_bbox.py"
-        time-limit="30" retry="1">
+        time-limit="30" retry="3">
     <remap from="~boxes" to="cluster_point_indices_decomposer/boxes" />
     <rosparam>
       check_times: 10
@@ -102,7 +102,7 @@
   <test test-name="test_cluster_point_indices_decomposer_bbox_align_boxes_with_frame"
         name="test_cluster_point_indices_decomposer_bbox_align_boxes_with_frame"
         pkg="jsk_pcl_ros" type="test_cluster_point_indices_decomposer_bbox.py"
-        time-limit="30" retry="1">
+        time-limit="30" retry="3">
     <remap from="~boxes" to="cluster_point_indices_decomposer_align_boxes_with_frame/boxes" />
     <rosparam>
       check_times: 10
@@ -112,7 +112,7 @@
   <test test-name="test_cluster_point_indices_decomposer_bbox_pca"
         name="test_cluster_point_indices_decomposer_bbox_pca"
         pkg="jsk_pcl_ros" type="test_cluster_point_indices_decomposer_bbox.py"
-        time-limit="30" retry="1">
+        time-limit="30" retry="3">
     <remap from="~boxes" to="cluster_point_indices_decomposer_pca/boxes" />
     <rosparam>
       check_times: 10
diff --git a/jsk_pcl_ros/test/test_convex_connected_voxels.test b/jsk_pcl_ros/test/test_convex_connected_voxels.test
index 6f632d3796..e22143f7d6 100644
--- a/jsk_pcl_ros/test/test_convex_connected_voxels.test
+++ b/jsk_pcl_ros/test/test_convex_connected_voxels.test
@@ -12,7 +12,8 @@
 
   <test test-name="test_convex_connected_voxels"
       name="test_convex_connected_voxels"
-      pkg="jsk_tools" type="test_topic_published.py">
+      pkg="jsk_tools" type="test_topic_published.py"
+      retry="3" time-limit="120">
     <rosparam>
       topic_0: /sample_convex_connected_voxels/output/indices
       timeout_0: 100
diff --git a/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test b/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test
index bd3a095e4d..56519753ae 100644
--- a/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test
+++ b/jsk_pcl_ros/test/test_organized_statistical_outlier_removal.test
@@ -2,15 +2,16 @@
 
   <include file="$(find jsk_pcl_ros)/sample/sample_organized_statistical_outlier_removal.launch">
     <arg name="gui" value="false"/>
+    <arg name="RATE" value="0.25" />
   </include>
 
   <test test-name="test_organized_statistical_outlier_removal"
         name="test_organized_statistical_outlier_removal"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /statistical_outlier_removal/output
-      timeout_0: 30
+      timeout_0: 100
     </rosparam>
   </test>
 
diff --git a/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h b/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h
index 5720025844..78ae3d4172 100644
--- a/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h
+++ b/jsk_pcl_ros_utils/include/jsk_pcl_ros_utils/pointcloud_to_pcd.h
@@ -41,6 +41,7 @@
 #include <jsk_recognition_utils/tf_listener_singleton.h>
 
 #include <sensor_msgs/PointCloud2.h>
+#include <std_srvs/Empty.h>
 #include <dynamic_reconfigure/server.h>
 #include <jsk_pcl_ros_utils/PointCloudToPCDConfig.h>
 
@@ -55,6 +56,8 @@ namespace jsk_pcl_ros_utils
     virtual void onInit();
     virtual void timerCallback (const ros::TimerEvent& event);
     virtual void configCallback(Config &config, uint32_t level);
+    void savePCD();
+    bool savePCDCallback(std_srvs::Empty::Request& req, std_srvs::Empty::Response& res);
     boost::mutex mutex_;
     boost::shared_ptr <dynamic_reconfigure::Server<Config> > srv_;
     ros::Timer timer_;
@@ -66,6 +69,7 @@ namespace jsk_pcl_ros_utils
     bool compressed_;
     std::string fixed_frame_;
     tf::TransformListener* tf_listener_;
+    ros::ServiceServer srv_save_pcd_server_;
   private:
   };
 }
diff --git a/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py b/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py
index 818d38d66d..ce05f59536 100755
--- a/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py
+++ b/jsk_pcl_ros_utils/scripts/cloud_on_plane_info.py
@@ -6,6 +6,27 @@
 from jsk_rviz_plugins.overlay_text_interface import OverlayTextInterface
 from jsk_recognition_msgs.msg import BoolStamped
 
+# workaround until https://github.com/jsk-ros-pkg/jsk_visualization/pull/864 is merged and released.
+class OverlayTextInterface_fix(OverlayTextInterface):
+    def publish(self, text):
+        msg = OverlayText()
+        msg.text = text
+        msg.width = int(self.config.width)
+        msg.height = int(self.config.height)
+        msg.top = int(self.config.top)
+        msg.left = int(self.config.left)
+        msg.fg_color.a = self.config.fg_alpha
+        msg.fg_color.r = self.config.fg_red
+        msg.fg_color.g = self.config.fg_green
+        msg.fg_color.b = self.config.fg_blue
+        msg.bg_color.a = self.config.bg_alpha
+        msg.bg_color.r = self.config.bg_red
+        msg.bg_color.g = self.config.bg_green
+        msg.bg_color.b = self.config.bg_blue
+        msg.text_size = self.config.text_size
+        self.pub.publish(msg)
+
+
 g_lock = Lock()
 g_msg = None
 def callback(msg):
@@ -23,7 +44,7 @@ def publish_text(event):
 
 if __name__ == "__main__":
     rospy.init_node("cloud_on_plane_info")
-    text_interface = OverlayTextInterface("~text")
+    text_interface = OverlayTextInterface_fix("~text")
     sub = rospy.Subscriber("~input", BoolStamped, callback)
     rospy.Timer(rospy.Duration(0.1), publish_text)
     rospy.spin()
diff --git a/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp b/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp
index 294a7d4bbc..fe9be68586 100644
--- a/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp
+++ b/jsk_pcl_ros_utils/src/pointcloud_to_pcd_nodelet.cpp
@@ -49,7 +49,16 @@ namespace jsk_pcl_ros_utils
     timer_.stop();
   }
 
-  void PointCloudToPCD::timerCallback (const ros::TimerEvent& event)
+  void PointCloudToPCD::timerCallback (const ros::TimerEvent& event) {
+    savePCD();
+  }
+
+  bool PointCloudToPCD::savePCDCallback(std_srvs::Empty::Request& req, std_srvs::Empty::Response& res) {
+    savePCD();
+    return true;
+  }
+
+  void PointCloudToPCD::savePCD()
   {
     pcl::PCLPointCloud2::ConstPtr cloud;
     cloud = ros::topic::waitForMessage<pcl::PCLPointCloud2>("input", *pnh_);
@@ -58,7 +67,7 @@ namespace jsk_pcl_ros_utils
       return;
     }
   
-    ROS_INFO ("Received %d data points in frame %s with the following fields: %s",
+    NODELET_INFO ("Received %d data points in frame %s with the following fields: %s",
             (int)cloud->width * cloud->height,
             cloud->header.frame_id.c_str (),
             pcl::getFieldsList (*cloud).c_str ());
@@ -67,7 +76,7 @@ namespace jsk_pcl_ros_utils
     Eigen::Quaternionf q = Eigen::Quaternionf::Identity ();
     if (!fixed_frame_.empty ()) {
         if (!tf_listener_->waitForTransform (fixed_frame_, cloud->header.frame_id, pcl_conversions::fromPCL (cloud->header).stamp, ros::Duration (duration_))) {
-        ROS_WARN("Could not get transform!");
+        NODELET_WARN("Could not get transform!");
         return;
       }
       tf::StampedTransform transform_stamped;
@@ -81,7 +90,7 @@ namespace jsk_pcl_ros_utils
   
     std::stringstream ss;
     ss << prefix_ << cloud->header.stamp << ".pcd";
-    ROS_INFO ("Data saved to %s", ss.str ().c_str ());
+    NODELET_INFO ("Data saved to %s", ss.str ().c_str ());
   
     pcl::PCDWriter writer;
     if(binary_)
@@ -107,21 +116,22 @@ namespace jsk_pcl_ros_utils
     srv_ = boost::make_shared <dynamic_reconfigure::Server<Config> > (*pnh_);
     dynamic_reconfigure::Server<Config>::CallbackType f = boost::bind(&PointCloudToPCD::configCallback, this, _1, _2);
     srv_->setCallback (f);
+    srv_save_pcd_server_ = pnh_->advertiseService("save_pcd", &PointCloudToPCD::savePCDCallback, this);
     tf_listener_ = jsk_recognition_utils::TfListenerSingleton::getInstance();
     if(binary_)
     {
       if(compressed_)
       {
-        ROS_INFO_STREAM ("Saving as binary compressed PCD");
+        NODELET_INFO("Saving as binary compressed PCD") ;
       }
       else
       {
-        ROS_INFO_STREAM ("Saving as binary PCD");
+        NODELET_INFO("Saving as binary PCD");
       }
     }
     else
     {
-      ROS_INFO_STREAM ("Saving as binary PCD");
+      NODELET_INFO("Saving as ASCII PCD");
     }
   }
 
@@ -133,7 +143,10 @@ namespace jsk_pcl_ros_utils
     compressed_ = config.compressed;
     fixed_frame_ = config.fixed_frame;
     duration_ = config.duration;
-    timer_ = pnh_->createTimer(ros::Duration(duration_), boost::bind(&PointCloudToPCD::timerCallback, this, _1));
+    timer_.stop();
+    if (duration_ != 0) {
+      timer_ = pnh_->createTimer(ros::Duration(duration_), boost::bind(&PointCloudToPCD::timerCallback, this, _1));
+    }
   }
 }
 
diff --git a/jsk_perception/CMakeLists.txt b/jsk_perception/CMakeLists.txt
index 6b568589ae..428e6c8a98 100644
--- a/jsk_perception/CMakeLists.txt
+++ b/jsk_perception/CMakeLists.txt
@@ -43,9 +43,7 @@ find_package(catkin REQUIRED COMPONENTS
 
 find_package(roseus)
 
-if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
-  catkin_python_setup()
-endif()
+catkin_python_setup()
 
 find_package(Boost REQUIRED COMPONENTS filesystem system)
 
@@ -155,6 +153,9 @@ generate_dynamic_reconfigure_options(
   cfg/VirtualCameraMono.cfg
   cfg/AWSDetectFaces.cfg
   cfg/RemoveBlurredFrames.cfg
+  cfg/VQA.cfg
+  cfg/Classification.cfg
+  cfg/VideoToScene.cfg
 )
 
 message(WARNING "DEPRECATION WARNING: srv files in jsk_pcl_ros are deprecated."
@@ -238,6 +239,7 @@ jsk_add_nodelet(src/sparse_image_decoder.cpp "jsk_perception/SparseImageDecoder"
 jsk_add_nodelet(src/color_histogram.cpp "jsk_perception/ColorHistogram" "color_histogram")
 jsk_add_nodelet(src/background_substraction_nodelet.cpp "jsk_perception/BackgroundSubstraction" "background_substraction")
 jsk_add_nodelet(src/fisheye_to_panorama.cpp "jsk_perception/FisheyeToPanorama" "fisheye_to_panorama")
+jsk_add_nodelet(src/point_pose_extractor.cpp "jsk_perception/PointPoseExtractor" "point_pose_extractor")
 # See https://github.com/drNoob13/fisheyeStitcher#pre-requisite
 if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_GREATER "7.0" AND # >= 7.1
    "${OpenCV_VERSION}" VERSION_GREATER "2.9.9" AND # >= 3.0.0
@@ -260,6 +262,7 @@ jsk_add_nodelet(src/overlay_image_color_on_mono.cpp "jsk_perception/OverlayImage
 jsk_add_nodelet(src/grid_label.cpp "jsk_perception/GridLabel" "grid_label")
 jsk_add_nodelet(src/color_histogram_label_match.cpp "jsk_perception/ColorHistogramLabelMatch" "color_histogram_label_match")
 jsk_add_nodelet(src/apply_mask_image.cpp "jsk_perception/ApplyMaskImage" "apply_mask_image")
+jsk_add_nodelet(src/invert_mask_image.cpp "jsk_perception/InvertMaskImage" "invert_mask_image")
 jsk_add_nodelet(src/unapply_mask_image.cpp "jsk_perception/UnapplyMaskImage" "unapply_mask_image")
 jsk_add_nodelet(src/single_channel_histogram.cpp "jsk_perception/SingleChannelHistogram" "single_channel_histogram")
 jsk_add_nodelet(src/blob_detector.cpp "jsk_perception/BlobDetector" "blob_detector")
@@ -294,7 +297,9 @@ jsk_add_nodelet(src/gaussian_blur.cpp "jsk_perception/GaussianBlur" "gaussian_bl
 jsk_add_nodelet(src/kmeans.cpp "jsk_perception/KMeans" "kmeans")
 jsk_add_nodelet(src/draw_rects.cpp "jsk_perception/DrawRects" "draw_rects")
 jsk_add_nodelet(src/remove_blurred_frames.cpp "jsk_perception/RemoveBlurredFrames" "remove_blurred_frames")
+jsk_add_nodelet(src/split_image.cpp "jsk_perception/SplitImage" "split_image")
 if("${OpenCV_VERSION}" VERSION_GREATER "2.9.9")  # >= 3.0.0
+  jsk_add_nodelet(src/video_to_scene.cpp "jsk_perception/VideoToScene" "video_to_scene")
   jsk_add_nodelet(src/bing.cpp "jsk_perception/Bing" "bing")
 endif()
 jsk_add_nodelet(src/tabletop_color_difference_likelihood.cpp "jsk_perception/TabletopColorDifferenceLikelihood" "tabletop_color_difference_likelihood")
@@ -352,7 +357,6 @@ target_link_libraries(oriented_gradient ${catkin_LIBRARIES} ${Boost_LIBRARIES})
 
 jsk_add_node(src/camshiftdemo.cpp camshiftdemo)
 jsk_add_node(src/linemod.cpp linemod)
-jsk_add_node(src/point_pose_extractor.cpp point_pose_extractor)
 jsk_add_node(src/white_balance_converter.cpp white_balance_converter)
 jsk_add_node(src/rectangle_detector.cpp rectangle_detector)
 jsk_add_node(src/calc_flow.cpp calc_flow)
@@ -449,9 +453,17 @@ catkin_install_python(
   PROGRAMS ${NODE_SCRIPT_PROGRAMS}
   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts
 )
+if(CATKIN_DEVEL_PREFIX)
+  add_custom_target(link_dir_mkdir
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts)
+  foreach(SUB_DIR craft deep_sort hmr openpose)
+    add_custom_target(link_dir_${SUB_DIR} ALL
+      COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/node_scripts/${SUB_DIR} ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_SHARE_DESTINATION}/node_scripts/${SUB_DIR})
+    add_dependencies(link_dir_${SUB_DIR} link_dir_mkdir)
+  endforeach()
+endif(CATKIN_DEVEL_PREFIX)
 endif()
 
-
 # ------------------------------------------------------------------------------------
 # Test
 # ------------------------------------------------------------------------------------
@@ -547,6 +559,8 @@ if(CATKIN_ENABLE_TESTING)
     jsk_add_rostest(test/overlay_image_color_on_mono.test)
     jsk_add_rostest(test/people_pose_estimation_2d.test)
     jsk_add_rostest(test/human_mesh_recovery.test)
+    jsk_add_rostest(test/point_pose_extractor.test)
+    jsk_add_rostest(test/point_pose_extractor_group_ns.test)
     jsk_add_rostest(test/pointit.test)
     jsk_add_rostest(test/polygon_array_color_histogram.test)
     jsk_add_rostest(test/polygon_array_color_likelihood.test)
@@ -586,6 +600,9 @@ if(CATKIN_ENABLE_TESTING)
     jsk_add_rostest(test/ycc_decomposer.test)
     jsk_add_rostest(test/depth_image_filter.test)
     jsk_add_rostest(test/remove_blurred_frames.test)
+    if(TARGET JSK_NODELET_${PROJECT_NAME}_video_to_scene)
+      jsk_add_rostest(test/video_to_scene.test)
+    endif()
     if(TARGET JSK_NODELET_${PROJECT_NAME}_bing)
       jsk_add_rostest(test/bing.test)
     endif()
diff --git a/jsk_perception/cfg/Classification.cfg b/jsk_perception/cfg/Classification.cfg
new file mode 100755
index 0000000000..83a569377c
--- /dev/null
+++ b/jsk_perception/cfg/Classification.cfg
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+PACKAGE = "jsk_perception"
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+gen = ParameterGenerator()
+
+gen.add("queries", str_t, 0, "A queries of Caption Task. The sentences are separated with ;", "human;kettle;cup;glass")
+
+exit(gen.generate(PACKAGE, "jsk_perception", "Classification"))
diff --git a/jsk_perception/cfg/VQA.cfg b/jsk_perception/cfg/VQA.cfg
new file mode 100755
index 0000000000..2f97b15c61
--- /dev/null
+++ b/jsk_perception/cfg/VQA.cfg
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+PACKAGE = "jsk_perception"
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+gen = ParameterGenerator()
+
+gen.add("questions", str_t, 0,
+        "Default questions of VQA. You can split questions by ;",
+        "what does this image describe?")
+
+exit(gen.generate(PACKAGE, "jsk_perception", "VQA"))
diff --git a/jsk_perception/cfg/VideoToScene.cfg b/jsk_perception/cfg/VideoToScene.cfg
new file mode 100755
index 0000000000..617e99ffca
--- /dev/null
+++ b/jsk_perception/cfg/VideoToScene.cfg
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+
+PACKAGE='jsk_perception'
+
+from dynamic_reconfigure.parameter_generator_catkin import *
+
+gen = ParameterGenerator()
+
+gen.add("min_percent", int_t, 0, "The percentage of forgrand when the motion has stopped and publish image", 5, 0, 100)
+gen.add("max_percent", int_t, 0, "The percentage of forgrand when the scene is not settled", 20, 0, 100)
+
+exit(gen.generate(PACKAGE, "jsk_perception", "VideoToScene"))
diff --git a/jsk_perception/docker/Makefile b/jsk_perception/docker/Makefile
new file mode 100644
index 0000000000..83eb2800ab
--- /dev/null
+++ b/jsk_perception/docker/Makefile
@@ -0,0 +1,51 @@
+#
+# Author: Yoshiki Obinata <obinata@jsk.imi.i.u-tokyo.ac.jp>
+#
+
+# api directories
+OFAPROJECT = ofa
+CLIPPROJECT = clip
+# image names
+OFAIMAGE = jsk-ofa-server
+CLIPIMAGE = jsk-clip-server
+# commands
+BUILDIMAGE = docker build
+REMOVEIMAGE = docker rmi
+# configuration directories
+ifdef ROS_HOME
+CONFIGDIR = $(ROS_HOME)/data/jsk_perception
+else
+CONFIGDIR = $(HOME)/.ros/data/jsk_perception
+endif
+PARAMDIR = $(CONFIGDIR)/vil_params
+# Download files
+PARAMURLS = parameter_urls.txt
+# OFA parameters
+OFAPARAMFILES = $(foreach param, $(OFAPARAMS), $(PARAMDIR)/$(param))
+
+all: ofa clip
+
+# TODO check command wget exists, nvidia-driver version
+
+# TODO add clip
+$(PARAMDIR)/.download:
+	@if [ ! -d $(PARAMDIR) ]; then\
+		mkdir -p $(PARAMDIR); \
+	fi
+	wget -c -i $(PARAMURLS) -P $(PARAMDIR)
+	touch $(PARAMDIR)/.download
+
+ofa: $(PARAMDIR)/.download
+	$(BUILDIMAGE) $(OFAPROJECT) -t $(OFAIMAGE) -f $(OFAPROJECT)/Dockerfile
+
+clip: $(PARAMDIR)/.download
+	$(BUILDIMAGE) $(CLIPPROJECT) -t $(CLIPIMAGE) -f $(CLIPPROJECT)/Dockerfile
+
+# TODO add clip, glip
+clean:
+	@$(REMOVEIMAGE) $(OFAIMAGE)
+
+wipe: clean
+	rm -fr $(PARAMDIR)
+
+.PHONY: clean wipe ofa clip
diff --git a/jsk_perception/docker/clip/Dockerfile b/jsk_perception/docker/clip/Dockerfile
new file mode 100644
index 0000000000..f824e1c377
--- /dev/null
+++ b/jsk_perception/docker/clip/Dockerfile
@@ -0,0 +1,16 @@
+FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt -o Acquire::AllowInsecureRepositories=true update \
+    && apt-get install -y \
+    curl \
+    git \
+    libopencv-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# use requirements.txt generetaed by `pip freeze | grep ==` in the container created on 2023-03-31
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt
+RUN pip install git+https://github.com/openai/CLIP.git
+RUN mkdir -p /workspace/Clip
+COPY server.py /workspace/Clip
+ENTRYPOINT cd /workspace/Clip && python server.py
\ No newline at end of file
diff --git a/jsk_perception/docker/clip/requirements.txt b/jsk_perception/docker/clip/requirements.txt
new file mode 100644
index 0000000000..74c63fe7c0
--- /dev/null
+++ b/jsk_perception/docker/clip/requirements.txt
@@ -0,0 +1,31 @@
+backcall==0.2.0
+brotlipy==0.7.0
+certifi==2020.12.5
+click==8.1.3
+conda==4.9.2
+conda-build==3.21.4
+decorator==4.4.2
+dnspython==2.1.0
+Flask==2.2.3
+ftfy==6.1.1
+importlib-metadata==6.1.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+MarkupSafe==2.1.2
+mkl-fft==1.2.0
+mkl-random==1.1.1
+mkl-service==2.3.0
+olefile==0.46
+opencv-python==4.7.0.72
+parso==0.7.0
+pkginfo==1.7.0
+pycosat==0.6.3
+python-etcd==0.4.5
+PyYAML==5.3.1
+regex==2023.3.23
+ruamel-yaml==0.15.87
+torch==1.7.1
+torchelastic==0.2.1
+torchvision==0.8.2
+Werkzeug==2.2.3
+zipp==3.15.0
diff --git a/jsk_perception/docker/clip/server.py b/jsk_perception/docker/clip/server.py
new file mode 100644
index 0000000000..c26d32db62
--- /dev/null
+++ b/jsk_perception/docker/clip/server.py
@@ -0,0 +1,67 @@
+import clip
+import cv2
+import numpy as np
+import os
+from PIL import Image as PLImage
+import torch
+
+# web server
+from flask import Flask, request, Response
+import json
+import base64
+
+
+def apply_half(t):
+    if t.dtype is torch.float32:
+        return t.to(dtype=torch.half)
+    return t
+
+class Inference:
+    def __init__(self, gpu_id=None):
+        self.gpu_id = gpu_id
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model, self.preprocess = clip.load('ViT-B/32', self.device)
+
+    def infer(self, img, texts):
+        # get cv2 image
+        image = cv2.resize(img, dsize=(640, 480)) # NOTE forcely
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image = PLImage.fromarray(image)
+        image_input = self.preprocess(image).unsqueeze(0).to(self.device)
+        text_inputs = torch.cat([clip.tokenize(c) for c in texts]).to(self.device)
+        with torch.no_grad():
+            image_features = self.model.encode_image(image_input)
+            text_features = self.model.encode_text(text_inputs)
+        image_features /= image_features.norm(dim=-1, keepdim=True)
+        text_features /= text_features.norm(dim=-1, keepdim=True)
+        probability = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+        similarity = (text_features.cpu().numpy() @ image_features.cpu().numpy().T).T[0]  # cosine similarity
+        values, indices = probability[0].topk(len(texts))
+        results = {}
+        for value, index in zip(values, indices):
+            results[texts[index]] = (value.item(), float(similarity[index]))
+        return results
+
+# run
+if __name__ == "__main__":
+    app = Flask(__name__)
+    infer = Inference()
+
+    @app.route("/inference", methods=['POST'])
+    def caption_request():
+        data = request.data.decode("utf-8")
+        data_json = json.loads(data)
+        # process image
+        image_b = data_json['image']
+        image_dec = base64.b64decode(image_b)
+        data_np = np.fromstring(image_dec, dtype='uint8')
+        img = cv2.imdecode(data_np, 1)
+        # get text
+        texts = data_json['queries']
+        infer_results = infer.infer(img, texts)
+        results = []
+        for q in infer_results:
+            results.append({"question": q, "probability": infer_results[q][0], "similarity": infer_results[q][1]})
+        return Response(response=json.dumps({"results": results}), status=200)
+    
+    app.run("0.0.0.0", 8080, threaded=True)
diff --git a/jsk_perception/docker/ofa/Dockerfile b/jsk_perception/docker/ofa/Dockerfile
new file mode 100644
index 0000000000..31ff077074
--- /dev/null
+++ b/jsk_perception/docker/ofa/Dockerfile
@@ -0,0 +1,21 @@
+FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-devel
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt -o Acquire::AllowInsecureRepositories=true update \
+    && apt-get install -y \
+    curl \
+    git \
+    libopencv-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+# use requirements.txt generetaed by `pip freeze | grep ==` in the container created on 2023-03-31
+COPY requirements.txt /tmp/requirements.txt
+RUN sed -i 's/pycocotools==2\.0\.4/pycocotools==2.0.6/g' /tmp/requirements.txt && pip install -r /tmp/requirements.txt
+RUN git clone https://github.com/OFA-Sys/OFA \
+    && mkdir -p /var/mount
+RUN cd OFA \
+    && git checkout 1809b55 \
+    && sed -i 's/pycocotools==2\.0\.4/pycocotools==2.0.6/g' requirements.txt \
+    && pip install -r requirements.txt \
+    && pip install "protobuf<3.20.0" "numpy>=1.20.3,<1.24" "pytorch-lightning==1.7.7" "torch==1.8.1" flask
+COPY server.py /workspace/OFA
+ENTRYPOINT cd /workspace/OFA && python server.py
diff --git a/jsk_perception/docker/ofa/requirements.txt b/jsk_perception/docker/ofa/requirements.txt
new file mode 100644
index 0000000000..25f0bd0144
--- /dev/null
+++ b/jsk_perception/docker/ofa/requirements.txt
@@ -0,0 +1,96 @@
+absl-py==1.4.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+antlr4-python3-runtime==4.8
+async-timeout==4.0.2
+attrs==22.2.0
+bitarray==2.7.3
+brotlipy==0.7.0
+cachetools==5.3.0
+certifi==2020.12.5
+charset-normalizer==3.1.0
+click==8.1.3
+colorama==0.4.6
+conda==4.9.2
+conda-build==3.21.4
+contourpy==1.0.7
+cycler==0.11.0
+Cython==0.29.33
+datasets==2.11.0
+dill==0.3.6
+dnspython==2.1.0
+einops==0.6.0
+Flask==2.2.3
+fonttools==4.39.3
+frozenlist==1.3.3
+fsspec==2023.3.0
+ftfy==6.0.3
+google-auth==2.17.1
+google-auth-oauthlib==0.4.6
+grpcio==1.53.0
+huggingface-hub==0.13.3
+hydra-core==1.0.7
+importlib-metadata==6.1.0
+importlib-resources==5.12.0
+itsdangerous==2.1.2
+jedi==0.17.0
+Jinja2==3.1.2
+joblib==1.2.0
+kiwisolver==1.4.4
+lightning-utilities==0.8.0
+lxml==4.9.2
+Markdown==3.4.3
+MarkupSafe==2.1.2
+matplotlib==3.7.1
+mkl-fft==1.3.0
+mkl-random==1.1.1
+mkl-service==2.3.0
+multidict==6.0.4
+multiprocess==0.70.14
+nltk==3.8.1
+numpy==1.23.5
+oauthlib==3.2.2
+olefile==0.46
+omegaconf==2.0.6
+opencv-python==4.7.0.72
+packaging==23.0
+pandas==1.5.3
+pkginfo==1.7.0
+portalocker==2.7.0
+protobuf==3.19.6
+pyarrow==11.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycocoevalcap==1.2
+pycocotools==2.0.4
+pycosat==0.6.3
+pyDeprecate==0.3.2
+pyparsing==3.0.9
+python-dateutil==2.8.2
+python-etcd==0.4.5
+pytorch-lightning==1.7.7
+PyYAML==5.4.1
+regex==2023.3.23
+requests-oauthlib==1.3.1
+responses==0.18.0
+rouge-score==0.1.2
+rsa==4.9
+ruamel-yaml==0.15.87
+sacrebleu==2.3.1
+tabulate==0.9.0
+tensorboard==2.12.0
+tensorboard-data-server==0.7.0
+tensorboard-plugin-wit==1.8.1
+tensorboardX==2.4.1
+timm==0.6.13
+torch==1.8.1
+torchelastic==0.2.2
+torchmetrics==0.11.4
+torchtext==0.9.1
+torchvision==0.9.1
+tqdm==4.65.0
+typing-extensions==4.5.0
+Werkzeug==2.2.3
+xxhash==3.2.0
+yarl==1.8.2
+zipp==3.15.0
diff --git a/jsk_perception/docker/ofa/server.py b/jsk_perception/docker/ofa/server.py
new file mode 100644
index 0000000000..6c667e3328
--- /dev/null
+++ b/jsk_perception/docker/ofa/server.py
@@ -0,0 +1,278 @@
+# sys
+import os
+# inference
+import torch
+import numpy as np
+import cv2
+from fairseq import utils, tasks
+from fairseq import checkpoint_utils
+from fairseq import options
+from fairseq.dataclass.utils import convert_namespace_to_omegaconf
+from utils.eval_utils import eval_step
+from utils.zero_shot_utils import zero_shot_step
+from tasks.mm_tasks.caption import CaptionTask
+from tasks.mm_tasks.refcoco import RefcocoTask
+from tasks.mm_tasks.vqa_gen import VqaGenTask
+from models.ofa import OFAModel
+from torchvision import transforms
+from PIL import Image
+# web server
+from flask import Flask, request, Response
+import json
+import base64
+
+PARAM_DIR = "/var/mount/params"
+# OFA_PARAM[TASK][SCALE]
+OFA_PARAM = {
+    "caption":{
+        "large":"caption_large_best_clean.pt",
+        "huge":"caption_huge_best.pt"
+    },
+    "refcoco":{
+        "large":"refcocog_large_best.pt",
+        "huge":"refcocog_large_best.pt"
+    },
+    "vqa_gen":{
+        "large":"vqa_large_best.pt",
+        "huge":"vqa_large_best.pt"
+    }
+}
+
+def apply_half(t):
+    if t.dtype is torch.float32:
+        return t.to(dtype=torch.half)
+    return t
+
+class Inference:
+    def __init__(self, task, model_scale):
+        self.use_cuda = torch.cuda.is_available()
+        self.use_fp16 = False
+
+        # set params
+        param = OFA_PARAM[task][model_scale]
+        param_path = os.path.join(PARAM_DIR, param)
+        overrides={"bpe_dir":"utils/BPE", "eval_cider":False, "beam":5,
+                   "max_len_b":16, "no_repeat_ngram_size":3, "seed":7}
+
+        self.task_name = task
+        if task == "caption":
+            tasks.register_task(task, CaptionTask)
+            self.models, self.cfg, self.task = checkpoint_utils.load_model_ensemble_and_task(
+                utils.split_paths(param_path),
+                arg_overrides=overrides)
+        elif task == "refcoco":
+            tasks.register_task(self.task, RefcocoTask)
+            self.models, self.cfg, self.task = checkpoint_utils.load_model_ensemble_and_task(
+                utils.split_paths(param_path),
+                arg_overrides=overrides)
+            self.cfg.common.seed = 7
+            self.cfg.generation.beam = 5
+            self.cfg.generation.min_len = 4
+            self.cfg.generation.max_len_a = 0
+            self.cfg.generation.max_len_b = 4
+            self.cfg.generation.no_repeat_ngram_size = 3
+            if self.cfg.common.seed is not None and not self.cfg.generation.no_seed_provided:
+                np.random.seed(self.cfg.common.seed)
+                utils.set_torch_seed(self.cfg.common.seed)
+        elif task == "vqa_gen":
+            tasks.register_task('vqa_gen', VqaGenTask)
+            parser = options.get_generation_parser()
+            input_args = ["", "--task=vqa_gen", "--beam=100", "--unnormalized", "--path={}".format(param_path), "--bpe-dir=utils/BPE"]
+            args = options.parse_args_and_arch(parser, input_args)
+            cfg = convert_namespace_to_omegaconf(args)
+            self.task = tasks.setup_task(cfg.task)
+            self.models, self.cfg = checkpoint_utils.load_model_ensemble(
+                utils.split_paths(cfg.common_eval.path),
+                task=self.task)
+        else:
+            raise RuntimeError("Please select models from caption, refcoco, vqa_gen")
+            return
+
+        # Move models to GPU
+        for model in self.models:
+            model.eval()
+            if self.use_fp16:
+                model.half()
+            if self.use_cuda and not self.cfg.distributed_training.pipeline_model_parallel:
+                model.cuda()
+            model.prepare_for_inference_(self.cfg)
+
+        # Image transform
+        self.generator = self.task.build_generator(self.models, self.cfg.generation)
+        mean = [0.5, 0.5, 0.5]
+        std = [0.5, 0.5, 0.5]
+        self.patch_resize_transform = transforms.Compose([
+            lambda image: image.convert("RGB"),
+            transforms.Resize((self.cfg.task.patch_image_size, self.cfg.task.patch_image_size), interpolation=Image.BICUBIC),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=mean, std=std),
+        ])
+
+        self.pad_idx = self.task.src_dict.pad()
+
+    def visual_grounding(self, Image, Text):
+        sample = self.construct_sample(Image, Text.lower())
+        sample = utils.move_to_cuda(sample) if self.use_cuda else sample
+        sample = utils.apply_to_sample(apply_half, sample) if self.use_fp16 else sample
+        with torch.no_grad():
+            result, scores = eval_step(self.task, self.generator, self.models, sample)
+        img = np.asarray(Image)
+        cv2.rectangle(
+            img,
+            (int(result[0]["box"][0]), int(result[0]["box"][1])),
+            (int(result[0]["box"][2]), int(result[0]["box"][3])),
+            (0, 255, 0), 3)
+        return img
+
+    def encode_text(self, text, length=None, append_bos=False, append_eos=False):
+        bos_item = torch.LongTensor([self.task.src_dict.bos()])
+        eos_item = torch.LongTensor([self.task.src_dict.eos()])
+        # pad_idx = self.task.src_dict.pad()
+        s = self.task.tgt_dict.encode_line(
+            line=self.task.bpe.encode(text),
+            add_if_not_exist=False,
+            append_eos=False).long()
+        if length is not None:
+            s = s[:length]
+        if append_bos:
+            s = torch.cat([bos_item, s])
+        if append_eos:
+            s = torch.cat([s, eos_item])
+        return s
+
+    def construct_sample(self, image, text):
+        if self.task_name == "caption" or self.task_name == "vqa_gen":
+            patch_image = self.patch_resize_transform(image).unsqueeze(0)
+            patch_mask = torch.tensor([True])
+            src_text = self.encode_text(" " + text, append_bos=True, append_eos=True).unsqueeze(0)
+            src_length = torch.LongTensor([s.ne(self.pad_idx).long().sum() for s in src_text])
+            if self.task_name == "caption":
+                sample = {
+                    "id":np.array(['42']),
+                    "net_input": {
+                        "src_tokens": src_text,
+                        "src_lengths": src_length,
+                        "patch_images": patch_image,
+                        "patch_masks": patch_mask
+                    }
+                }
+            elif self.task_name == "vqa_gen":
+                ref_dict = np.array([{'yes': 1.0}]) # just placeholder
+                sample = {
+                    "id":np.array(['42']),
+                    "net_input": {
+                        "src_tokens": src_text,
+                        "src_lengths": src_length,
+                        "patch_images": patch_image,
+                        "patch_masks": patch_mask
+                    },
+                    "ref_dict": ref_dict,
+                }
+            return sample
+        elif self.task_name == "refcoco":
+            patch_image_size = self.cfg.task.patch_image_size
+            w, h = image.size
+            w_resize_ratio = torch.tensor(patch_image_size / w).unsqueeze(0)
+            h_resize_ratio = torch.tensor(patch_image_size / h).unsqueeze(0)
+            patch_image = self.patch_resize_transform(image).unsqueeze(0)
+            patch_mask = torch.tensor([True])
+            src_text = self.encode_text(' which region does the text " {} " describe?'.format(text), append_bos=True,
+                                   append_eos=True).unsqueeze(0)
+            src_length = torch.LongTensor([s.ne(self.pad_idx).long().sum() for s in src_text])
+            sample = {
+                "id": np.array(['42']),
+                "net_input": {
+                    "src_tokens": src_text,
+                    "src_lengths": src_length,
+                    "patch_images": patch_image,
+                    "patch_masks": patch_mask,
+                },
+                "w_resize_ratios": w_resize_ratio,
+                "h_resize_ratios": h_resize_ratio,
+                "region_coords": torch.randn(1, 4)
+            }
+            return sample
+
+    def infer(self, img, text):
+        # get cv2 image
+        if self.task_name == "caption" or self.task_name == "vqa_gen":
+            image = cv2.resize(img, dsize=(640, 480)) # NOTE forcely
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            image = Image.fromarray(image)
+            # Construct input sample & preprocess for GPU if cuda available
+            sample = self.construct_sample(image, text)
+            sample = utils.move_to_cuda(sample) if self.use_cuda else sample
+            sample = utils.apply_to_sample(apply_half, sample) if self.use_fp16 else sample
+            if self.task_name == "caption":
+                with torch.no_grad():
+                    result, scores = eval_step(self.task, self.generator, self.models, sample)
+                text = result[0]['caption']
+                return text
+            elif self.task_name == "vqa_gen":
+                with torch.no_grad():
+                    result, scores = zero_shot_step(self.task, self.generator, self.models, sample)
+                text = result[0]['answer']
+                return text
+        elif self.task_name == "refcoco":
+            pass
+
+# run
+if __name__ == "__main__":
+    app = Flask(__name__)
+    ofa_task = os.environ["OFA_TASK"]
+    ofa_model_scale = os.environ["OFA_MODEL_SCALE"]
+    # TODO add refcoco
+    if ofa_task == "all":
+        caption_infer = Inference("caption", ofa_model_scale)
+        vqa_infer = Inference("vqa_gen", ofa_model_scale)
+
+    elif ofa_task == "caption":
+        caption_infer = Inference("caption", ofa_model_scale)
+
+    elif ofa_task == "vqa_gen":
+        vqa_infer = Inference("vqa_gen", ofa_model_scale)
+
+    else:
+        raise RuntimeError("No application is available")
+
+    try:
+        @app.route("/caption", methods=['POST'])
+        def caption_request():
+            data = request.data.decode("utf-8")
+            data_json = json.loads(data)
+            # process image
+            image_b = data_json['image']
+            image_dec = base64.b64decode(image_b)
+            data_np = np.fromstring(image_dec, dtype='uint8')
+            img = cv2.imdecode(data_np, 1)
+            # get text
+            texts = data_json['queries']
+            results = []
+            for text in texts:
+                answer = caption_infer.infer(img, text)
+                results.append({"question": text, "answer": answer})
+            return Response(response=json.dumps({"results": results}), status=200)
+    except NameError:
+        print("Skipping create caption app")
+
+    try:
+        @app.route("/vqa_gen", methods=['POST'])
+        def vqa_request():
+            data = request.data.decode("utf-8")
+            data_json = json.loads(data)
+            # process image
+            image_b = data_json['image']
+            image_dec = base64.b64decode(image_b)
+            data_np = np.fromstring(image_dec, dtype='uint8')
+            img = cv2.imdecode(data_np, 1)
+            # get text
+            texts = data_json['queries']
+            results = []
+            for text in texts:
+                answer = vqa_infer.infer(img, text)
+                results.append({"question": text, "answer": answer})
+            return Response(response=json.dumps({"results": results}), status=200)
+    except NameError:
+        print("Skipping create vqa_gen app")
+    
+    app.run("0.0.0.0", 8080, threaded=True)
diff --git a/jsk_perception/docker/parameter_urls.txt b/jsk_perception/docker/parameter_urls.txt
new file mode 100644
index 0000000000..361481ee8c
--- /dev/null
+++ b/jsk_perception/docker/parameter_urls.txt
@@ -0,0 +1,4 @@
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_huge_best.pt
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/caption_large_best_clean.pt
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/refcocog_large_best.pt
+https://ofa-beijing.oss-cn-beijing.aliyuncs.com/checkpoints/vqa_large_best.pt
diff --git a/jsk_perception/docker/run_jsk_vil_api b/jsk_perception/docker/run_jsk_vil_api
new file mode 100755
index 0000000000..acef636280
--- /dev/null
+++ b/jsk_perception/docker/run_jsk_vil_api
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+import argparse
+import os
+try:
+    import rospy
+    import_rospy = True
+except ImportError:
+    import_rospy = False
+import subprocess
+import sys
+
+CONTAINERS = {"ofa": "jsk-ofa-server",
+              "clip": "jsk-clip-server"}
+OFA_MODEL_SCALES = ["base", "large", "huge"]
+
+parser = argparse.ArgumentParser(description="JSK Vision and Language API runner")
+parser.add_argument("model", choices=CONTAINERS.keys(),
+                    help="Vision and Language model you want to run", type=str)
+parser.add_argument("-p", "--port", default=8888, help="Port of the API server", type=int)
+parser.add_argument("--ofa_task", default="caption",
+                    help="Tasks to be loaded in OFA. OFA loads all tasks by default and requires large amount of GPU RAM")
+parser.add_argument("--ofa_model_scale", default="huge", choices=OFA_MODEL_SCALES,
+                    help="Scale of parameters to be read in OFA. Use huge by default")
+
+args, unknown = parser.parse_known_args()
+
+if __name__ == "__main__":
+    if import_rospy and len(sys.argv) != len(rospy.myargv()): # roslaunch
+        rospy.init_node("jsk_vil_docker_manager")
+    port_option = "{}:8080".format(str(args.port))
+    _ros_home = os.getenv("ROS_HOME", os.path.join(os.environ["HOME"],".ros"))
+    mount_option = "{}:/var/mount/params".format(os.path.join(_ros_home, "data", "jsk_perception", "vil_params"))
+    docker_image = "{}:latest".format(CONTAINERS[args.model])
+
+    cmd = ["docker", "run", "--rm", "-it", "--gpus", "all",
+           "-p", port_option,
+           "-v", mount_option]
+
+    # OFA
+    if args.model == "ofa":
+        ofa_option = ["-e", "OFA_TASK={}".format(args.ofa_task),
+                      "-e", "OFA_MODEL_SCALE={}".format(args.ofa_model_scale)]
+        cmd += ofa_option
+
+    # Execute
+    print("Run {} container on port {}".format(docker_image, args.port))
+    cmd.append(docker_image)
+    subprocess.run(cmd)
diff --git a/jsk_perception/include/jsk_perception/invert_mask_image.h b/jsk_perception/include/jsk_perception/invert_mask_image.h
new file mode 100644
index 0000000000..6d6ce4b03f
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/invert_mask_image.h
@@ -0,0 +1,66 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2022, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+
+
+#ifndef JSK_PERCEPTION_INVERT_MASK_IMAGE_H_
+#define JSK_PERCEPTION_INVERT_MASK_IMAGE_H_
+
+#include <jsk_topic_tools/diagnostic_nodelet.h>
+#include <sensor_msgs/Image.h>
+#include <ros/ros.h>
+
+
+namespace jsk_perception
+{
+  class InvertMaskImage: public jsk_topic_tools::DiagnosticNodelet
+  {
+  public:
+    InvertMaskImage(): DiagnosticNodelet("InvertMaskImage") {}
+    virtual ~InvertMaskImage();
+  protected:
+
+    virtual void onInit();
+    virtual void subscribe();
+    virtual void unsubscribe();
+    virtual void invert(const sensor_msgs::Image::ConstPtr& mask_msg);
+
+    boost::mutex mutex_;
+    ros::Subscriber sub_mask_;
+    ros::Publisher pub_mask_;
+  private:
+  };
+}
+
+#endif
diff --git a/jsk_perception/include/jsk_perception/point_pose_extractor.h b/jsk_perception/include/jsk_perception/point_pose_extractor.h
new file mode 100644
index 0000000000..ddcdfd00ed
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/point_pose_extractor.h
@@ -0,0 +1,706 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2015, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+#ifndef JSK_PERCEPTION_POINT_POSE_EXTRACTOR_H_
+#define JSK_PERCEPTION_POINT_POSE_EXTRACTOR_H_
+
+#include <jsk_topic_tools/diagnostic_nodelet.h>
+#include <ros/ros.h>
+#include <rospack/rospack.h>
+#include <cv_bridge/cv_bridge.h>
+#if ( CV_MAJOR_VERSION >= 4)
+#include <opencv2/opencv.hpp>
+#include <opencv2/highgui/highgui_c.h>
+#include <opencv2/calib3d/calib3d_c.h>
+#else
+#include <opencv/highgui.h>
+#include <opencv/cv.hpp>
+#endif
+#include <posedetection_msgs/Feature0DDetect.h>
+#include <posedetection_msgs/ImageFeature0D.h>
+#include <posedetection_msgs/ObjectDetection.h>
+#include <posedetection_msgs/Object6DPose.h>
+#include <geometry_msgs/PoseStamped.h>
+#include <geometry_msgs/Pose.h>
+#include <geometry_msgs/Quaternion.h>
+#include <sensor_msgs/image_encodings.h>
+#include <image_transport/image_transport.h>
+#include <image_geometry/pinhole_camera_model.h>
+#include <tf/tf.h>
+#include <tf/transform_broadcaster.h>
+#include <boost/shared_ptr.hpp>
+#include <boost/foreach.hpp>
+#include <boost/filesystem.hpp>
+#include <vector>
+#include <sstream>
+#include <iostream>
+#include <dynamic_reconfigure/server.h>
+#include <jsk_perception/point_pose_extractorConfig.h>
+#include <jsk_perception/SetTemplate.h>
+
+namespace enc = sensor_msgs::image_encodings;
+
+void features2keypoint (posedetection_msgs::Feature0D features,
+                        std::vector<cv::KeyPoint>& keypoints,
+                        cv::Mat& descriptors){
+  keypoints.resize(features.scales.size());
+  descriptors.create(features.scales.size(),features.descriptor_dim,CV_32FC1);
+  std::vector<cv::KeyPoint>::iterator keypoint_it = keypoints.begin();
+  for ( int i=0; keypoint_it != keypoints.end(); ++keypoint_it, ++i ) {
+    *keypoint_it = cv::KeyPoint(cv::Point2f(features.positions[i*2+0],
+                                            features.positions[i*2+1]),
+                                features.descriptor_dim, // size
+                                features.orientations[i], //angle
+                                0, // resonse
+                                features.scales[i] // octave
+                                );
+    for (int j = 0; j < features.descriptor_dim; j++){
+      descriptors.at<float>(i,j) =
+        features.descriptors[i*features.descriptor_dim+j];
+    }
+  }
+}
+
+
+class Matching_Template
+{
+public:
+  std::string _matching_frame;
+  cv::Mat _template_img;
+  std::vector<cv::KeyPoint> _template_keypoints;
+  cv::Mat _template_descriptors;
+  int _original_width_size;
+  int _original_height_size;
+  double _template_width;       // width of template [m]
+  double _template_height;      // height of template [m]
+  tf::Transform _relativepose;
+  cv::Mat _affine_matrix;
+  std::string _window_name;
+  // The maximum allowed reprojection error to treat a point pair as an inlier
+  double _reprojection_threshold;
+  // threshold on squared ratio of distances between NN and 2nd NN
+  double _distanceratio_threshold;
+  std::vector<cv::Point2d> _correspondances;
+  cv::Mat _previous_stack_img;
+
+  Matching_Template(){
+  }
+  Matching_Template(cv::Mat img,
+                    std::string matching_frame,
+                    int original_width_size,
+                    int original_height_size,
+                    double template_width,
+                    double template_height,
+                    tf::Transform relativepose,
+                    cv::Mat affine_matrix,
+                    double reprojection_threshold,
+                    double distanceratio_threshold,
+                    std::string window_name,
+                    bool autosize){
+
+    _template_img = img.clone();
+    _matching_frame = matching_frame;
+    _original_width_size = original_width_size;
+    _original_height_size = original_height_size;
+    _template_width = template_width;
+    _template_height = template_height;
+    _relativepose = relativepose;
+    _affine_matrix = affine_matrix;
+    _window_name = window_name;
+    _reprojection_threshold = reprojection_threshold;
+    _distanceratio_threshold = distanceratio_threshold;
+    _template_keypoints.clear();
+    _correspondances.clear();
+    //    cv::namedWindow(_window_name, autosize ? CV_WINDOW_AUTOSIZE : 0);
+  }
+
+
+  virtual ~Matching_Template(){
+    std::cerr << "delete " << _window_name << std::endl;
+  }
+
+
+  std::string get_window_name(){
+    return _window_name;
+  }
+
+  std::vector<cv::Point2d>* correspondances(){
+    return &_correspondances;
+  }
+
+  bool check_template (){
+    if (_template_img.empty() && _template_keypoints.size() == 0 &&
+        _template_descriptors.empty()){
+      return false;
+    }
+    else {
+      return true;
+    }
+  }
+
+
+  int set_template(ros::ServiceClient client){
+    posedetection_msgs::Feature0DDetect srv;
+
+    if ( _template_img.empty()) {
+      ROS_ERROR ("template picture is empty.");
+      return -1;
+    }
+    ROS_INFO_STREAM("read template image and call " << client.getService() << " service");
+    cv_bridge::CvImage cv_img;
+    cv_img.image = cv::Mat(_template_img);
+    cv_img.encoding = std::string("bgr8");
+    srv.request.image = *(cv_img.toImageMsg());
+    if (client.call(srv)) {
+      ROS_INFO_STREAM("get features with " << srv.response.features.scales.size() << " descriptoins");
+      features2keypoint (srv.response.features, _template_keypoints, _template_descriptors);
+
+      // inverse affine translation
+      cv::Mat M_inv;
+      cv::Mat dst;
+      cv::invert(_affine_matrix, M_inv);
+      //      cv::drawKeypoints (tmp, _template_keypoints, dst, cv::Scalar::all(1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
+      //      cv::drawKeypoints (tmp, _template_keypoints, dst);
+      // cv::warpPerspective(_template_img, dst, M_inv,
+      //                          cv::Size(_original_width_size, _original_height_size),
+      //                          CV_INTER_LINEAR, IPL_BORDER_CONSTANT, 0);
+      for (int i = 0; i < (int)_template_keypoints.size(); i++){
+        cv::Point2f pt;
+        cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
+        cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &_template_keypoints.at(i).pt);
+        cv::perspectiveTransform (pt_src_mat, pt_mat, M_inv);
+        _template_keypoints.at(i).pt = pt_mat.at<cv::Point2f>(0,0);
+      }
+      //      cvSetMouseCallback (_window_name.c_str(), &PointPoseExtractor::cvmousecb, this);
+      //      _template_img = dst;
+      return 0;
+    } else {
+      ROS_ERROR("Failed to call service Feature0DDetect");
+      return 1;
+    }
+  }
+
+
+  double log_fac( int n )
+  {
+    static std::vector<double> slog_table;
+    int osize = slog_table.size();
+    if(osize <= n){
+      slog_table.resize(n+1);
+      if(osize == 0){
+        slog_table[0] = -1;
+        slog_table[1] = log(1.0);
+        osize = 2;
+      }
+      for(int i = osize; i <= n; i++ ){
+        slog_table[i] = slog_table[i-1] + log(i);
+      }
+    }
+    return slog_table[n];
+  }
+
+  int min_inlier( int n, int m, double p_badsupp, double p_badxform )
+  {
+    double pi, sum;
+    int i, j;
+    double lp_badsupp = log( p_badsupp );
+    double lp_badsupp1 = log( 1.0 - p_badsupp );
+    for( j = m+1; j <= n; j++ ){
+      sum = 0;
+      for( i = j; i <= n; i++ ){
+        pi = (i-m) * lp_badsupp + (n-i+m) * lp_badsupp1 +
+          log_fac( n - m ) - log_fac( i - m ) - log_fac( n - i );
+        sum += exp( pi );
+      }
+      if( sum < p_badxform )
+        break;
+    }
+    return j;
+  }
+
+
+  bool estimate_od (ros::ServiceClient client, cv::Mat src_img,
+                    std::vector<cv::KeyPoint> sourceimg_keypoints,
+                    image_geometry::PinholeCameraModel pcam,
+                    double err_thr, cv::Mat &stack_img,
+                    cv::flann::Index* ft, posedetection_msgs::Object6DPose* o6p){
+
+    if ( _template_keypoints.size()== 0 &&
+         _template_descriptors.empty() ){
+      set_template(client);
+    }
+    if ( _template_keypoints.size()== 0 &&
+         _template_descriptors.empty() ) {
+      ROS_ERROR ("Template image was not set.");
+      return false;
+    }
+    // stacked image
+    cv::Size stack_size  = cv::Size(MAX(src_img.cols,_template_img.cols),
+                                    src_img.rows+_template_img.rows);
+    stack_img = cv::Mat(stack_size,CV_8UC3);
+    stack_img = cv::Scalar(0);
+    cv::Mat stack_img_tmp(stack_img,cv::Rect(0,0,_template_img.cols,_template_img.rows));
+    cv::add(stack_img_tmp, _template_img, stack_img_tmp);
+    cv::Mat stack_img_src(stack_img,cv::Rect(0,_template_img.rows,src_img.cols,src_img.rows));
+    cv::add(stack_img_src, src_img, stack_img_src);
+    _previous_stack_img = stack_img.clone();
+
+    // matching
+    cv::Mat m_indices(_template_descriptors.rows, 2, CV_32S);
+    cv::Mat m_dists(_template_descriptors.rows, 2, CV_32F);
+    ft->knnSearch(_template_descriptors, m_indices, m_dists, 2, cv::flann::SearchParams(-1) );
+
+    // matched points
+    std::vector<cv::Point2f> pt1, pt2;
+    std::vector<int> queryIdxs,trainIdxs;
+    for ( unsigned int j = 0; j < _template_keypoints.size(); j++ ) {
+      if ( m_dists.at<float>(j,0) < m_dists.at<float>(j,1) * _distanceratio_threshold) {
+        queryIdxs.push_back(j);
+        trainIdxs.push_back(m_indices.at<int>(j,0));
+      }
+    }
+    if ( queryIdxs.size() == 0 ) {
+      ROS_WARN_STREAM("could not find matched points with distanceratio(" <<_distanceratio_threshold << ")");
+    } else {
+      cv::KeyPoint::convert(_template_keypoints,pt1,queryIdxs);
+      cv::KeyPoint::convert(sourceimg_keypoints,pt2,trainIdxs);
+    }
+
+    ROS_INFO ("Found %d total matches among %d template keypoints", (int)pt2.size(), (int)_template_keypoints.size());
+
+    cv::Mat H;
+    std::vector<uchar> mask((int)pt2.size());
+
+    if ( pt1.size() > 4 ) {
+          // ToDO for curve face
+      H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2), mask, CV_RANSAC, _reprojection_threshold);
+    }
+
+    // draw line
+    for (int j = 0; j < (int)pt1.size(); j++){
+      cv::Point2f pt, pt_orig;
+      cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
+      cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &pt1.at(j));
+      cv::perspectiveTransform (pt_src_mat, pt_mat, _affine_matrix);
+      pt_orig = pt_mat.at<cv::Point2f>(0,0);
+      if ( mask.at(j)){
+        cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
+                 CV_RGB(0,255,0), 1,8,0);
+      }
+      else {
+        cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
+                 CV_RGB(255,0,255), 1,8,0);
+      }
+    }
+    int inlier_sum = 0;
+    for (int k=0; k < (int)mask.size(); k++){
+      inlier_sum += (int)mask.at(k);
+    }
+
+    double text_scale = 1.5;
+    {
+      int fontFace = 0, thickness = 0, baseLine;
+      int x, y;
+      cv::Size text_size;
+      std::string text;
+
+      text = "inlier: " + boost::lexical_cast<std::string>((int)inlier_sum) + " / " + boost::lexical_cast<std::string>((int)pt2.size());
+      text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
+      x = stack_img.size().width - text_size.width;
+      y = text_size.height + thickness + 10; // 10pt pading
+      cv::putText (stack_img, text, cv::Point(x, y),
+                   fontFace, text_scale, CV_RGB(0, 255, 0),
+                   thickness, 8, false);
+
+      text = "template: " + boost::lexical_cast<std::string>((int)_template_keypoints.size());
+      text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
+      x = stack_img.size().width - text_size.width;
+      y += text_size.height + thickness + 10; // 10pt pading
+      cv::putText (stack_img, text, cv::Point(x, y),
+                   fontFace, text_scale, CV_RGB(0, 255, 0),
+                   thickness, 8, false);
+    }
+
+    // draw correspondances
+    ROS_INFO("  _correspondances.size: %d", (int)_correspondances.size());
+    for (int j = 0; j < (int)_correspondances.size(); j++){
+      cv::circle(stack_img, cv::Point2f(_correspondances.at(j).x, _correspondances.at(j).y + _template_img.size().height),
+                 8, CV_RGB(255,0,0), -1);
+    }
+
+    ROS_INFO("    inlier_sum:%d   min_lier:%d", inlier_sum, min_inlier((int)pt2.size(), 4, 0.10, 0.01));
+    if ((cv::countNonZero( H ) == 0) || (inlier_sum < min_inlier((int)pt2.size(), 4, 0.10, 0.01))){
+      ROS_INFO("    inlier_sum < min_lier return-from estimate-od");
+      if( _window_name != "" )
+        cv::imshow(_window_name, stack_img);
+      return false;
+    }
+
+    std::string _type;
+    char chr[20];
+
+    // number of match points
+    sprintf(chr, "%d", (int)pt2.size());
+    _type = _matching_frame + "_" + std::string(chr);
+
+    sprintf(chr, "%d", inlier_sum);
+    _type = _type + "_" + std::string(chr);
+
+    cv::Point2f corners2d[4] = {cv::Point2f(0,0),
+                                cv::Point2f(_original_width_size,0),
+                                cv::Point2f(_original_width_size,_original_height_size),
+                                cv::Point2f(0,_original_height_size)};
+    cv::Mat corners2d_mat (cv::Size(4, 1), CV_32FC2, corners2d);
+    cv::Point3f corners3d[4] = {cv::Point3f(0,0,0),
+                                cv::Point3f(0,_template_width,0),
+                                cv::Point3f(_template_height,_template_width,0),
+                                cv::Point3f(_template_height,0,0)};
+    cv::Mat corners3d_mat (cv::Size(4, 1), CV_32FC3, corners3d);
+
+    cv::Mat corners2d_mat_trans;
+
+    cv::perspectiveTransform (corners2d_mat, corners2d_mat_trans, H);
+
+    double fR3[3], fT3[3];
+    cv::Mat rvec(3, 1, CV_64FC1, fR3);
+    cv::Mat tvec(3, 1, CV_64FC1, fT3);
+    cv::Mat zero_distortion_mat = cv::Mat::zeros(4, 1, CV_64FC1);
+
+    cv::solvePnP (corners3d_mat, corners2d_mat_trans, 
+                  pcam.intrinsicMatrix(),
+                  zero_distortion_mat,//if unrectified: pcam.distortionCoeffs()
+                  rvec, tvec);
+
+    tf::Transform checktf, resulttf;
+
+    checktf.setOrigin( tf::Vector3(fT3[0], fT3[1], fT3[2] ) );
+
+    double rx = fR3[0], ry = fR3[1], rz = fR3[2];
+    tf::Quaternion quat;
+    double angle = cv::norm(rvec);
+    quat.setRotation(tf::Vector3(rx/angle, ry/angle, rz/angle), angle);
+    checktf.setRotation( quat );
+
+    resulttf = checktf * _relativepose;
+
+    ROS_INFO( "      tx: (%0.2lf,%0.2lf,%0.2lf) rx: (%0.2lf,%0.2lf,%0.2lf)",
+              resulttf.getOrigin().getX(),
+              resulttf.getOrigin().getY(),
+              resulttf.getOrigin().getZ(),
+              resulttf.getRotation().getAxis().x() * resulttf.getRotation().getAngle(),
+              resulttf.getRotation().getAxis().y() * resulttf.getRotation().getAngle(),
+              resulttf.getRotation().getAxis().z() * resulttf.getRotation().getAngle());
+
+    o6p->pose.position.x = resulttf.getOrigin().getX();
+    o6p->pose.position.y = resulttf.getOrigin().getY();
+    o6p->pose.position.z = resulttf.getOrigin().getZ();
+    o6p->pose.orientation.w = resulttf.getRotation().w();
+    o6p->pose.orientation.x = resulttf.getRotation().x();
+    o6p->pose.orientation.y = resulttf.getRotation().y();
+    o6p->pose.orientation.z = resulttf.getRotation().z();
+    o6p->type = _matching_frame; // _type
+
+    // draw 3d cube model
+    std::vector<cv::Point2f> projected_top;
+    {
+      tf::Vector3 coords[8] = {tf::Vector3(0,0,0),
+                               tf::Vector3(0, _template_width, 0),
+                               tf::Vector3(_template_height, _template_width,0),
+                               tf::Vector3(_template_height, 0, 0),
+                               tf::Vector3(0, 0, -0.03),
+                               tf::Vector3(0, _template_width, -0.03),
+                               tf::Vector3(_template_height, _template_width, -0.03),
+                               tf::Vector3(_template_height, 0, -0.03)};
+
+      projected_top = std::vector<cv::Point2f>(8);
+
+      for(int i=0; i<8; i++) {
+        coords[i] = checktf * coords[i];
+        cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());
+        projected_top[i] = pcam.project3dToPixel(pt);
+      }
+    }
+
+    { // check if the matched region does not too big or too small
+      float max_x, max_y, min_x, min_y;
+      max_x = max_y = -1e9;
+      min_x = min_y = 1e9;
+      for (int j = 0; j < 4; j++){
+        cv::Point2f pt = corners2d_mat_trans.at<cv::Point2f>(0,j);
+        max_x = std::max(max_x, pt.x), max_y = std::max(max_y, pt.y);
+        min_x = std::min(min_x, pt.x), min_y = std::min(min_y, pt.y);
+      }
+      if((max_x - min_x) < 30 || (max_y - min_y) < 30 ||
+         src_img.rows < (max_x - min_x)/2 || src_img.cols < (max_y - min_y)/2){
+        ROS_INFO("        matched region is too big or small (2< && <30) width:%f height:%f return-from estimate-od", max_x - min_x, max_y - min_y);
+        return false;
+      }
+    }
+
+    double err_sum = 0;
+    bool err_success = true;
+    for (int j = 0; j < 4; j++){
+      double err = sqrt(pow((corners2d_mat_trans.at<cv::Point2f>(0,j).x - projected_top.at(j).x), 2) +
+                      pow((corners2d_mat_trans.at<cv::Point2f>(0,j).y - projected_top.at(j).y), 2));
+      err_sum += err;
+    }
+    if (err_sum > err_thr){
+      ROS_INFO("          err_sum:%f > err_thr:%f return-from estimate-od", err_sum, err_thr);
+      err_success = false;
+    } else {
+      o6p->reliability = 1.0 - (err_sum / err_thr);
+    }
+    // draw lines around the detected object
+    for (int j = 0; j < corners2d_mat_trans.cols; j++){
+      cv::Point2f p1(corners2d_mat_trans.at<cv::Point2f>(0,j).x,
+                     corners2d_mat_trans.at<cv::Point2f>(0,j).y+_template_img.rows);
+      cv::Point2f p2(corners2d_mat_trans.at<cv::Point2f>(0,(j+1)%corners2d_mat_trans.cols).x,
+                     corners2d_mat_trans.at<cv::Point2f>(0,(j+1)%corners2d_mat_trans.cols).y+_template_img.rows);
+      cv::line (stack_img, p1, p2, CV_RGB(255, 0, 0),
+                (err_success?4:1), // width
+                CV_AA, 0);
+    }
+
+    // draw 3d cube model
+    if(projected_top.size() == 8) { // verify, the size is 8
+      int cnt = 8;
+      std::vector<cv::Point2f> ps(cnt);
+      for(int i=0; i<cnt; i++)
+        ps[i] = cv::Point2f(projected_top[i].x,
+                            projected_top[i].y+_template_img.rows);
+
+      int draw_width = ( err_success ? 3 : 1);
+      for(int i=0; i<4; i++) {
+        cv::line (stack_img, ps[i], ps[(i+1)%4], CV_RGB(0, 0, 255), draw_width, CV_AA, 0);
+        cv::line (stack_img, ps[i+4], ps[(i+1)%4+4], CV_RGB(0, 0, 255), draw_width, CV_AA, 0);
+        cv::line (stack_img, ps[i], ps[i+4], CV_RGB(0, 0, 255), draw_width, CV_AA, 0);
+      }
+    }
+
+    // draw coords
+    if ( err_success )
+    {
+      tf::Vector3 coords[4] = { tf::Vector3(0,0,0),
+                                tf::Vector3(0.05,0,0),
+                                tf::Vector3(0,0.05,0),
+                                tf::Vector3(0,0,0.05)};
+      std::vector<cv::Point2f> ps(4);
+
+      for(int i=0; i<4; i++) {
+        coords[i] = resulttf * coords[i];
+        cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());   
+        ps[i] = pcam.project3dToPixel(pt);
+        ps[i].y += _template_img.rows; // draw on camera image
+      }
+
+      cv::line (stack_img, ps[0], ps[1], CV_RGB(255, 0, 0), 3, CV_AA, 0);
+      cv::line (stack_img, ps[0], ps[2], CV_RGB(0, 255, 0), 3, CV_AA, 0);
+      cv::line (stack_img, ps[0], ps[3], CV_RGB(0, 0, 255), 3, CV_AA, 0);
+    }
+
+    // write text on image
+    {
+      std::string text;
+      int x, y;
+      text = "error: " + boost::lexical_cast<std::string>(err_sum);
+      x = stack_img.size().width - 16*17*text_scale; // 16pt * 17
+      y = _template_img.size().height - (16 + 2)*text_scale*6;
+      cv::putText (stack_img, text, cv::Point(x, y),
+                   0, text_scale, CV_RGB(0, 255, 0),
+                   2, 8, false);
+      ROS_INFO("      %s < %f (threshold)", text.c_str(), err_thr );
+    }
+    // for debug window
+    if( _window_name != "" )
+      cv::imshow(_window_name, stack_img);
+
+    return err_success;
+  }
+};  // the end of difinition of class Matching_Template
+
+
+namespace jsk_perception
+{
+
+  class PointPoseExtractor: public jsk_topic_tools::DiagnosticNodelet
+  {
+  public:
+    std::vector<Matching_Template *> _templates;
+    typedef jsk_perception::point_pose_extractorConfig Config;
+    PointPoseExtractor() : DiagnosticNodelet("PointPoseExtractor") {}
+    virtual ~PointPoseExtractor();
+  protected:
+    ros::NodeHandle _n;
+    image_transport::ImageTransport *it;
+    ros::Subscriber _sub;
+    ros::ServiceServer _server;
+    ros::ServiceClient _client;
+    ros::Publisher _pub, _pub_agg, _pub_pose;
+    tf::TransformBroadcaster _br;
+    image_transport::Publisher _debug_pub;
+    bool _first_sample_change;
+    double _reprojection_threshold;
+    double _distanceratio_threshold;
+    double _th_step;
+    double _phi_step;
+    bool _autosize;
+    double _err_thr;
+    image_geometry::PinholeCameraModel pcam;
+    bool pnod;
+    bool _initialized;
+    bool _viewer;
+    bool _publish_tf;
+    std::string _child_frame_id;
+
+    virtual void initialize ();
+    virtual cv::Mat make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec,
+                                    double template_width, double template_height, cv::Size &size);
+    virtual int make_warped_images (cv::Mat src, std::vector<cv::Mat> &imgs,
+                                    std::vector<cv:: Mat> &Mvec,
+                                    double template_width, double template_height,
+                                    double th_step, double phi_step);
+    virtual bool add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose,
+                                  double template_width, double template_height,
+                                  double theta_step, double phi_step);
+    virtual bool settemplate_cb (jsk_perception::SetTemplate::Request &req,
+                                 jsk_perception::SetTemplate::Response &res);
+    virtual void imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg);
+    virtual void onInit();
+    virtual void subscribe();
+    virtual void unsubscribe();
+    virtual void dyn_conf_callback(Config &config, uint32_t level);
+
+    static void cvmousecb (int event, int x, int y, int flags, void* param){
+      PointPoseExtractor* ppe = static_cast<PointPoseExtractor*>(param);
+      Matching_Template *mt = ppe->_templates.back();
+      // std::cerr << "mousecb_ -> " << mt << std::endl;
+      switch (event){
+      case CV_EVENT_LBUTTONUP: {
+        cv::Point2d pt(x,y - (int)mt->_template_img.size().height);
+        ROS_INFO("add correspondence (%d, %d)", (int)pt.x, (int)pt.y);
+        mt->_correspondances.push_back(pt);
+        if ((int)mt->_correspondances.size() >= 4){
+          make_template_from_mousecb(ppe);
+          mt->_correspondances.clear();
+          ROS_INFO("reset");
+        }
+        break;
+      }
+      case CV_EVENT_RBUTTONUP: {
+        mt->_correspondances.clear();
+        ROS_INFO("reset");
+        break;
+      }
+      }
+    }
+  static void make_template_from_mousecb(PointPoseExtractor* ppe){
+    Matching_Template *mt = ppe->_templates.back();
+    cv::Mat H;
+    cv::Mat tmp_template, tmp_warp_template;
+    std::vector<cv::Point2f>pt1, pt2;
+    double width, height;
+    std::string filename;
+    std::cout << "input template's [width]" << std::endl;
+    std::cin >> width;
+    std::cout << "input template's [height]" << std::endl;
+    std::cin >> height;
+    std::cout << "input template's [filename]" << std::endl;
+    std::cin >> filename;
+
+    for (int i = 0; i < 4; i++){
+      pt1.push_back(cv::Point2d((int)mt->_correspondances.at(i).x,
+                                (int)mt->_correspondances.at(i).y + mt->_template_img.size().height));
+    }
+    cv::Rect rect = cv::boundingRect(cv::Mat(pt1));
+    double scale = std::max(width, height) / 500.0;
+    int iwidth = width / scale, iheight = height / scale;
+    pt2.push_back(cv::Point2d(0,0));
+    pt2.push_back(cv::Point2d(iwidth,0));
+    pt2.push_back(cv::Point2d(iwidth,iheight));
+    pt2.push_back(cv::Point2d(0,     iheight));
+    H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2));
+
+    cv::getRectSubPix(mt->_previous_stack_img, rect.size(),
+                      cv::Point2f((rect.tl().x + rect.br().x)/2.0,(rect.tl().y + rect.br().y)/2.0),
+                      tmp_template);
+    cv::warpPerspective(mt->_previous_stack_img, tmp_warp_template, H, cv::Size(iwidth, iheight));
+
+    try {
+      cv::imwrite(filename,tmp_template);
+      boost::filesystem::path fname(filename);
+      std::stringstream ss;
+      ss << (fname.parent_path() / fname.stem()).c_str() << "_wrap" << fname.extension().c_str();
+      cv::imwrite(ss.str(),tmp_warp_template);
+    }catch (cv::Exception e) {
+      std::cerr << e.what()  << std::endl;
+    }
+
+    for (int i = 0; i < (int)pt1.size(); i++){
+      pt2.push_back(cv::Point2d((int)pt1.at(i).x - rect.x,
+                                (int)pt1.at(i).y - rect.y - mt->_template_img.size().height));
+    }
+    // cv::Mat mask_img = cv::Mat::zeros(tmp_template.size(), CV_8UC3);
+    // cv::fillConvexPoly(mask_img, pt2.begin(), (int)pt2.size(), CV_RGB(255, 255, 255));
+
+    // cv::namedWindow("hoge", 1);
+    // cv::imshow("hoge", mask_img);
+
+    cv::Mat M = (cv::Mat_<double>(3,3) << 1,0,0, 0,1,0, 0,0,1);
+    std::string window_name = "sample" + boost::lexical_cast<std::string>((int)ppe->_templates.size());
+
+    Matching_Template* tmplt =
+      new Matching_Template (tmp_warp_template, "sample",
+                             tmp_warp_template.size().width, tmp_warp_template.size().height,
+                             width, height,
+                             tf::Transform(tf::Quaternion(0, 0, 0, 1), tf::Vector3(0, 0, 0)),
+                             M,
+                             mt->_reprojection_threshold,
+                             mt->_distanceratio_threshold,
+                             ppe->_first_sample_change ? window_name : mt->_window_name,
+                             cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
+
+    mt->_correspondances.clear();
+    ppe->_templates.push_back(tmplt);
+    cv::namedWindow(ppe->_first_sample_change ? window_name : mt->_window_name,
+                    cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
+    cvSetMouseCallback (ppe->_first_sample_change ? window_name.c_str() : mt->_window_name.c_str(),
+                        &cvmousecb, ppe);
+    ppe->_first_sample_change = true;
+  }
+  private:
+  };
+}
+
+
+#endif
diff --git a/jsk_perception/include/jsk_perception/split_image.h b/jsk_perception/include/jsk_perception/split_image.h
new file mode 100644
index 0000000000..858e6cfde9
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/split_image.h
@@ -0,0 +1,69 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2023, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * split_image.h
+ * Author: Yoshiki Obinata <obinata@jsk.imi.i.u-tokyo.ac.jp>
+ */
+
+
+#ifndef SPLIT_IMAGE_H_
+#define SPLIT_IMAGE_H_
+
+#include <cv_bridge/cv_bridge.h>
+#include <jsk_topic_tools/diagnostic_nodelet.h>
+#include <sensor_msgs/Image.h>
+
+namespace jsk_perception{
+
+    class SplitImage : public jsk_topic_tools::DiagnosticNodelet{
+
+    public:
+        SplitImage() : DiagnosticNodelet("SplitImage"){}
+        virtual ~SplitImage();
+    protected:
+        virtual void onInit();
+        virtual void subscribe();
+        virtual void unsubscribe();
+        virtual void splitImage(const sensor_msgs::Image::ConstPtr& image_msg);
+
+        ros::Subscriber sub_;
+        std::vector<ros::Publisher> pubs_;
+        int vertical_parts_;
+        int horizontal_parts_;
+    private:
+    };
+};
+
+#endif // SPLIT_IMAGE_H_
diff --git a/jsk_perception/include/jsk_perception/video_to_scene.h b/jsk_perception/include/jsk_perception/video_to_scene.h
new file mode 100644
index 0000000000..15a9473c23
--- /dev/null
+++ b/jsk_perception/include/jsk_perception/video_to_scene.h
@@ -0,0 +1,81 @@
+// -*- mode: C++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2022, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * video_to_scene.h
+ * Author: Kei Okada <k-okada@jsk.t.u-tokyo.ac.jp>
+ */
+
+#ifndef VIDEO_TO_SCENE_H_
+#define VIDEO_TO_SCENE_H_
+
+#include <jsk_topic_tools/diagnostic_nodelet.h>
+#include <dynamic_reconfigure/server.h>
+#include <jsk_perception/VideoToSceneConfig.h>
+#include <sensor_msgs/Image.h>
+#include <opencv2/opencv.hpp>
+#include <opencv2/bgsegm.hpp>
+#include <cv_bridge/cv_bridge.h>
+#include <sensor_msgs/image_encodings.h>
+#include <std_msgs/Float64.h>
+#include <image_transport/image_transport.h>
+
+
+namespace jsk_perception{
+    class VideoToScene: public jsk_topic_tools::DiagnosticNodelet{
+    public:
+        typedef VideoToSceneConfig Config;
+    VideoToScene() : DiagnosticNodelet("VideoToScene") {}
+    protected:
+        virtual void onInit();
+        virtual void subscribe();
+        virtual void unsubscribe();
+        virtual void work(const sensor_msgs::Image::ConstPtr& image_msg);
+        virtual void configCallback(Config &config, uint32_t level);
+
+        boost::shared_ptr<dynamic_reconfigure::Server<Config> > srv_;
+        image_transport::Subscriber sub_;
+        boost::shared_ptr<image_transport::ImageTransport> it_;
+        image_transport::Publisher pub_;
+        boost::mutex mutex_;
+    private:
+        cv::Ptr<cv::bgsegm::BackgroundSubtractorGMG> bgsubtractor;
+        int min_percent_;
+        int max_percent_;
+        bool captured_;
+
+    };
+}
+
+#endif // VIDEO_TO_SCENE_H_
diff --git a/jsk_perception/launch/classification.launch b/jsk_perception/launch/classification.launch
new file mode 100644
index 0000000000..fbf14fae0e
--- /dev/null
+++ b/jsk_perception/launch/classification.launch
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="host" default="localhost" />
+  <arg name="port" default="8888" />
+  <arg name="gui" default="false" />
+  <arg name="run_api" default="false" />
+  <arg name="CLASSIFICATION_INPUT_IMAGE" default="image" />
+  <arg name="image_transport" default="raw" />
+
+  <node name="classification_api" pkg="jsk_perception" type="run_jsk_vil_api" output="log"
+        args="clip -p $(arg port)" if="$(arg run_api)" />
+
+  <node name="classification" pkg="jsk_perception" type="classification_node.py" output="screen">
+    <remap from="~image" to="$(arg CLASSIFICATION_INPUT_IMAGE)" />
+    <rosparam subst_value="true">
+      host: $(arg host)
+      port: $(arg port)
+      image_transport: $(arg image_transport)
+    </rosparam>
+  </node>
+
+  <include file="$(find jsk_perception)/launch/ofa_gui.launch" if="$(arg gui)" />
+
+</launch>
diff --git a/jsk_perception/launch/ofa_gui.launch b/jsk_perception/launch/ofa_gui.launch
new file mode 100644
index 0000000000..c0cba568db
--- /dev/null
+++ b/jsk_perception/launch/ofa_gui.launch
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+    <node pkg="rviz" type="rviz" name="$(anon ofa_ros_client_rviz)"
+          args="-d $(find jsk_perception)/sample/config/sample_ofa_config.rviz" />
+    <node pkg="rqt_reconfigure" type="rqt_reconfigure" name="$(anon ofa_ros_client_rqt)" />
+</launch>
diff --git a/jsk_perception/launch/video_to_scene.launch b/jsk_perception/launch/video_to_scene.launch
new file mode 100644
index 0000000000..c262c3ab2c
--- /dev/null
+++ b/jsk_perception/launch/video_to_scene.launch
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="INPUT_IMAGE" />
+  <arg name="OUTPUT_IMAGE" default="$(arg INPUT_IMAGE)/output" />
+  <arg name="IMAGE_TRANSPORT" default="raw" />
+  <arg name="DEFAULT_NAMESPACE" default="video_to_scene" />
+  <arg name="launch_manager" default="true" />
+  <arg name="manager" default="manager" />
+  <arg name="min_percent" default="5" />
+  <arg name="max_percent" default="20" />
+
+  <node pkg="nodelet" type="nodelet" name="$(arg manager)"
+        args="manager" output="screen" if="$(arg launch_manager)" />
+
+  <node pkg="nodelet" type="nodelet" name="$(arg DEFAULT_NAMESPACE)"
+        args="load jsk_perception/VideoToScene $(arg manager)"
+        respawn="true"
+        output="screen">
+    <param name="~image_transport" value="$(arg IMAGE_TRANSPORT)" />
+    <param name="~min_percent" value="$(arg min_percent)" />
+    <param name="~max_percent" value="$(arg max_percent)" />
+    <remap from="~input" to="$(arg INPUT_IMAGE)" />
+    <remap from="~output" to="$(arg OUTPUT_IMAGE)" />
+  </node>
+</launch>
diff --git a/jsk_perception/launch/vqa.launch b/jsk_perception/launch/vqa.launch
new file mode 100644
index 0000000000..32b3988b4b
--- /dev/null
+++ b/jsk_perception/launch/vqa.launch
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="host" default="localhost" />
+  <arg name="port" default="8888" />
+  <arg name="gui" default="false" />
+  <arg name="run_api" default="false" />
+  <arg name="VQA_INPUT_IMAGE" default="vqa_image" />
+  <arg name="image_transport" default="raw" />
+
+  <node name="ofa_api" pkg="jsk_perception" type="run_jsk_vil_api" output="log"
+        args="ofa -p $(arg port)" if="$(arg run_api)" />
+
+  <node name="vqa" pkg="jsk_perception" type="vqa_node.py" output="screen">
+    <remap from="~image" to="$(arg VQA_INPUT_IMAGE)" />
+    <rosparam subst_value="true">
+      host: $(arg host)
+      port: $(arg port)
+      image_transport: $(arg image_transport)
+    </rosparam>
+  </node>
+
+  <include file="$(find jsk_perception)/launch/ofa_gui.launch" if="$(arg gui)" />
+
+</launch>
diff --git a/jsk_perception/node_scripts/alexnet_object_recognition.py b/jsk_perception/node_scripts/alexnet_object_recognition.py
index 7722715d30..73fbdd88b7 100755
--- a/jsk_perception/node_scripts/alexnet_object_recognition.py
+++ b/jsk_perception/node_scripts/alexnet_object_recognition.py
@@ -30,7 +30,7 @@
 from jsk_recognition_utils.chainermodels import AlexNetBatchNormalization
 import rospy
 from sensor_msgs.msg import Image
-from vgg16_object_recognition import VGG16ObjectRecognition
+from jsk_perception.vgg16_object_recognition import VGG16ObjectRecognition
 
 
 class AlexNetObjectRecognition(VGG16ObjectRecognition):
diff --git a/jsk_perception/node_scripts/binpack_rect_array.py b/jsk_perception/node_scripts/binpack_rect_array.py
index d7854b9efb..21ee0fb80b 100755
--- a/jsk_perception/node_scripts/binpack_rect_array.py
+++ b/jsk_perception/node_scripts/binpack_rect_array.py
@@ -24,6 +24,8 @@ def width_cmp(self, another):
         return self.rect.width - another.rect.width
     def __cmp__(self, another): # python2
         return self.max_cmp(another) or self.min_cmp(another) or self.height_cmp(another) or self.width_cmp(another) or 0
+    def __lt__(self, another): # python3
+        return self.max_cmp(another) or self.min_cmp(another) or self.height_cmp(another) or self.width_cmp(another) or 0
         
 class Packer():
     def __init__(self):
diff --git a/jsk_perception/node_scripts/bof_histogram_extractor.py b/jsk_perception/node_scripts/bof_histogram_extractor.py
index 4b06167593..e8ccb05186 100755
--- a/jsk_perception/node_scripts/bof_histogram_extractor.py
+++ b/jsk_perception/node_scripts/bof_histogram_extractor.py
@@ -5,8 +5,12 @@
 """
 
 import gzip
-import cPickle as pickle
-from distutils.version import StrictVersion
+import sys
+if sys.version_info.major <= 2:
+    import cPickle as pickle
+else:  # for python3
+    import _pickle as pickle
+from distutils.version import LooseVersion
 from pkg_resources import get_distribution
 
 import numpy as np
@@ -34,14 +38,20 @@ def __init__(self):
         if bof_data is None:
             quit()
         with gzip.open(bof_data, 'rb') as f:
-            self.bof = pickle.load(f)
-        if (StrictVersion(get_distribution('scikit-learn').version) >=
-                StrictVersion('0.17.0')):
+            if sys.version_info.major <= 2:
+                self.bof = pickle.load(f)
+            else:
+                self.bof = pickle.load(f, encoding='latin1')
+        if (LooseVersion(get_distribution('scikit-learn').version) >=
+                LooseVersion('0.17')):
             if 'n_jobs' not in self.bof.nn.__dict__ or not isinstance(self.bof.nn.n_jobs, int):
                 # In scikit-learn>=0.17.0,
                 # sklearn.neighbors.NearestNeighbors needs 'n_jobs' attribute.
                 # https://github.com/jsk-ros-pkg/jsk_recognition/issues/1669
                 self.bof.nn.n_jobs = 1
+        # noetic uses newer scikit-learn which uses n_samples_fit_
+        if 'n_ssamples_fit_' not in self.bof.nn.__dict__:
+            self.bof.nn.n_samples_fit_ = self.bof.nn._fit_X.shape[0]
 
         self._pub = self.advertise('~output', VectorArray, queue_size=1)
         rospy.loginfo('Initialized BoF histogram extractor')
@@ -79,7 +89,7 @@ def _apply(self, feature_msg, label_msg):
         decomposed = decompose_descriptors_with_label(
             descriptors=desc, positions=pos, label_img=label,
             skip_zero_label=True)
-        X = np.array(decomposed.values())
+        X = np.array(list(decomposed.values()))
         if X.size == 0:
             return
         X = self.bof.transform(X)
diff --git a/jsk_perception/node_scripts/classification_node.py b/jsk_perception/node_scripts/classification_node.py
new file mode 100755
index 0000000000..5c9debf692
--- /dev/null
+++ b/jsk_perception/node_scripts/classification_node.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import rospy
+from jsk_perception.vil_inference_client import ClipClientNode
+
+
+def main():
+    rospy.init_node("classification")
+    node = ClipClientNode()
+    rospy.spin()
+
+if __name__ == "__main__":
+    main()
diff --git a/jsk_perception/node_scripts/hand_pose_estimation_2d.py b/jsk_perception/node_scripts/hand_pose_estimation_2d.py
index 4617760375..800e706259 100755
--- a/jsk_perception/node_scripts/hand_pose_estimation_2d.py
+++ b/jsk_perception/node_scripts/hand_pose_estimation_2d.py
@@ -253,7 +253,7 @@ def _cb_with_depth_info(self, img_msg, depth_msg, camera_info_msg):
         elif depth_msg.encoding != '32FC1':
             rospy.logerr('Unsupported depth encoding: %s' % depth_msg.encoding)
 
-        hands_points, hands_point_scores, hands_score = self.hand_pose_estimate(img)
+        hands_points, hands_point_scores, hands_score = self.hand_pose_estimate(img, img_msg.header)
 
         hand_pose_2d_msg = self._create_2d_hand_pose_array_msgs(
             hands_points,
@@ -318,7 +318,7 @@ def _cb(self, img_msg):
         img = self.bridge.imgmsg_to_cv2(
             img_msg, desired_encoding='bgr8')
         hands_points, hands_point_scores, hands_score = \
-            self.hand_pose_estimate(img)
+            self.hand_pose_estimate(img, img_msg.header)
 
         hand_pose_msg = self._create_2d_hand_pose_array_msgs(
             hands_points,
@@ -358,18 +358,19 @@ def _create_2d_hand_pose_array_msgs(
             hand_pose_msg.poses.append(pose_msg)
         return hand_pose_msg
 
-    def hand_pose_estimate(self, bgr):
+    def hand_pose_estimate(self, bgr, header):
         if self.backend == 'torch':
-            return self._hand_pose_estimate_torch_backend(bgr)
+            return self._hand_pose_estimate_torch_backend(bgr, header)
         raise ValueError('Unsupported backend: {0}'.format(self.backend))
 
-    def _hand_pose_estimate_torch_backend(self, frame):
+    def _hand_pose_estimate_torch_backend(self, frame, header):
         hands_points, hands_rect, hands_point_scores, hands_score = \
             self.pyramid_inference(frame)
 
         if self.visualize:
             vis_img = self._draw_joints(frame, hands_points, hands_rect)
             vis_msg = self.bridge.cv2_to_imgmsg(vis_img, encoding='bgr8')
+            vis_msg.header = header
             self.image_pub.publish(vis_msg)
 
         return hands_points, hands_point_scores, hands_score
diff --git a/jsk_perception/node_scripts/label_image_classifier.py b/jsk_perception/node_scripts/label_image_classifier.py
index 7804510b22..8494773d97 100755
--- a/jsk_perception/node_scripts/label_image_classifier.py
+++ b/jsk_perception/node_scripts/label_image_classifier.py
@@ -7,45 +7,8 @@
 import rospy
 from sensor_msgs.msg import Image
 
+from jsk_perception.label_image_classifier import LabelImageClassifier
 
-class LabelImageClassifier(ConnectionBasedTransport):
-
-    classifier_name = 'label_image_classifier'
-
-    def __init__(self):
-        super(LabelImageClassifier, self).__init__()
-        self.ignore_labels = rospy.get_param('~ignore_labels', [])
-        self.target_names = rospy.get_param('~target_names', [])
-        self.pub = self.advertise(
-            '~output', ClassificationResult, queue_size=1)
-
-    def subscribe(self):
-        self.sub = rospy.Subscriber('~input', Image, self._cb)
-
-    def unsubscribe(self):
-        self.sub.unregister()
-
-    def _cb(self, imgmsg):
-        bridge = cv_bridge.CvBridge()
-        img = bridge.imgmsg_to_cv2(imgmsg)
-        label, proba = self._classify(img)
-        msg = ClassificationResult()
-        msg.header = imgmsg.header
-        msg.labels = [label]
-        msg.label_names = [self.target_names[label]]
-        msg.label_proba = [proba[label]]
-        msg.probabilities = proba
-        msg.classifier = self.classifier_name
-        msg.target_names = self.target_names
-        self.pub.publish(msg)
-
-    def _classify(self, label_img):
-        counts = np.bincount(label_img.flatten(),
-                             minlength=len(self.target_names))
-        counts[self.ignore_labels] = 0
-        label = np.argmax(counts)
-        proba = counts.astype(np.float32) / counts.sum()
-        return label, proba
 
 
 if __name__ == '__main__':
diff --git a/jsk_perception/node_scripts/matchtemplate.py b/jsk_perception/node_scripts/matchtemplate.py
index 0a46300e0b..432aa5dfb0 100755
--- a/jsk_perception/node_scripts/matchtemplate.py
+++ b/jsk_perception/node_scripts/matchtemplate.py
@@ -3,7 +3,10 @@
 import roslib; roslib.load_manifest('jsk_perception')
 import rospy
 import numpy as np
-import thread
+try:
+    import thread
+except:
+    import _thread as thread
 from sensor_msgs.msg import Image
 from geometry_msgs.msg import *
 from jsk_recognition_msgs.msg import Rect
@@ -37,10 +40,10 @@ def _MinMaxLock2nd(arr,ex_size,is_min):
     if is_min: idx = 0
     else: idx = 1
     status = cv2.minMaxLoc(arr)
-    pt1 = (max(status[2+idx][0]-ex_size[0]/2,0),
-           max(status[2+idx][1]-ex_size[1]/2,0))
-    pt2 = (min(status[2+idx][0]+ex_size[0]/2,arr.shape[1]),
-           min(status[2+idx][1]+ex_size[1]/2,arr.shape[0]))
+    pt1 = (max(status[2+idx][0]-int(ex_size[0]/2),0),
+           max(status[2+idx][1]-int(ex_size[1]/2),0))
+    pt2 = (min(status[2+idx][0]+int(ex_size[0]/2),arr.shape[1]),
+           min(status[2+idx][1]+int(ex_size[1]/2),arr.shape[0]))
     mask = np.ones((arr.shape[0], arr.shape[1]), dtype=np.uint8) * 255
     mask[pt1[0]:pt2[0], pt1[1]:pt2[1]] = 0
     status2 = cv2.minMaxLoc(arr, mask)
@@ -127,8 +130,8 @@ def set_template (self, ref_id='', ref_image=None, ref_rect=None,
                 template_image = cv2.cvtColor(
                     ref_image_rect, cv2.COLOR_BGR2HSV)
 
-            self.templates[ref_id]['ref_point'] = (ref_rect[0]+ref_rect[2]/2,
-                                                   ref_rect[1]+ref_rect[3]/2)
+            self.templates[ref_id]['ref_point'] = (ref_rect[0]+int(ref_rect[2]/2),
+                                                   ref_rect[1]+int(ref_rect[3]/2))
             self.templates[ref_id]['ref_image'] = template_image
             rospy.loginfo("set ref_image id=%s, rect=%s", ref_id, ref_rect);
 
@@ -160,13 +163,13 @@ def set_reference (self, rect):
     def set_reference_point_callback (self, msg): # PointStamped
         self.lockobj.acquire()
         pt = (int(msg.point.x),int(msg.point.y))
-        rect = (pt[0]-self.default_template_size[0]/2,
-                pt[1]-self.default_template_size[1]/2,
+        rect = (pt[0]-int(self.default_template_size[0]/2),
+                pt[1]-int(self.default_template_size[1]/2),
                 self.default_template_size[0], self.default_template_size[1])
         self.set_reference(rect)
         print(rect)
-        search_rect = (pt[0]-self.default_search_size[0]/2,
-                       pt[1]-self.default_search_size[1]/2,
+        search_rect = (pt[0]-int(self.default_search_size[0]/2),
+                       pt[1]-int(self.default_search_size[1]/2),
                        self.default_search_size[0],self.default_search_size[1])
         self.set_template('',ser_frame=None, ser_rect=search_rect)
         self.lockobj.release()
@@ -204,7 +207,7 @@ def reconfigure(self,config,level):
                (template['ref_image'] is not None):
                 ref_pt = template['ref_point']
                 ref_size = template['ref_image'].shape
-                ref_rect = (ref_pt[0]-ref_size[0]/2,ref_pt[1]-ref_size[1]/2,ref_size[0],ref_size[1])
+                ref_rect = (ref_pt[0]-int(ref_size[0]/2),ref_pt[1]-int(ref_size[1]/2),ref_size[0],ref_size[1])
             self.set_template(ref_id=config['current_template_id'],
                               color=config['template_color_space'],
                               ref_rect=ref_rect,
@@ -270,8 +273,8 @@ def ser_image_callback (self, msg):
                     status = MaxLock2nd(results,reference_size) # maximum for others
                     status = (1 - status[0], 1 - status[1], status[2], status[3])
 
-                result_pt = (status[2][0]+search_rect[0]+reference_size[0]/2,
-                             status[2][1]+search_rect[1]+reference_size[1]/2)
+                result_pt = (status[2][0]+search_rect[0]+int(reference_size[0]/2),
+                             status[2][1]+search_rect[1]+int(reference_size[1]/2))
 
                 # publish center position as result
                 result.child_frame_id = template_id
@@ -291,8 +294,8 @@ def ser_image_callback (self, msg):
 
                 # self feedback
                 if self.auto_search_area:
-                    val_x = result_pt[0]-search_rect[2]/2
-                    val_y = result_pt[1]-search_rect[3]/2
+                    val_x = result_pt[0]-int(search_rect[2]/2)
+                    val_y = result_pt[1]-int(search_rect[3]/2)
                     ser_scale = max(2,5+np.log(status[0])) # ???
                     new_ser_rect = (
                         min(max(val_x,0),image_size[0]-search_rect[2]),
@@ -303,10 +306,10 @@ def ser_image_callback (self, msg):
                 # draw on debug image
                 if self.show_debug_image:
                     cv2.rectangle(debug_image,
-                                  (result_pt[0] - reference_size[0]/2,
-                                   result_pt[1] - reference_size[1]/2),
-                                  (result_pt[0] + reference_size[0]/2,
-                                   result_pt[1] + reference_size[1]/2),
+                                  (result_pt[0] - int(reference_size[0]/2),
+                                   result_pt[1] - int(reference_size[1]/2)),
+                                  (result_pt[0] + int(reference_size[0]/2),
+                                   result_pt[1] + int(reference_size[1]/2)),
                                   color=(0, 0, 255))  # red
                     cv2.rectangle(debug_image,
                                   (search_rect[0], search_rect[1]),
diff --git a/jsk_perception/node_scripts/people_pose_estimation_2d.py b/jsk_perception/node_scripts/people_pose_estimation_2d.py
index 08cd7bcc19..62b2fbe8ba 100755
--- a/jsk_perception/node_scripts/people_pose_estimation_2d.py
+++ b/jsk_perception/node_scripts/people_pose_estimation_2d.py
@@ -706,7 +706,7 @@ def _draw_joints(self, img, people_joint_positions, all_peaks):
                         cv2.fillConvexPoly(roi2, polygon - np.array([left, top]), color)
                         cv2.addWeighted(roi, 0.4, roi2, 0.6, 0.0, dst=roi)
                     #
-                    offset += len(self.index2handname) / 2
+                    offset += int(len(self.index2handname) / 2)
 
         return img
 
@@ -812,8 +812,8 @@ def _hand_estimate_chainer_backend_each(self, hand_bgr, cx, cy, left_hand):
         for i, heatmap in enumerate(hmaps):
             conf = heatmap.max()
             cds = np.array(np.where(heatmap==conf)).flatten().tolist()
-            py = cy + cds[0] - hand_bgr.shape[0] / 2
-            px = cx + cds[1] - hand_bgr.shape[1] / 2
+            py = cy + cds[0] - int(hand_bgr.shape[0] / 2)
+            px = cx + cds[1] - int(hand_bgr.shape[1] / 2)
             keypoints.append({'x': px, 'y': py, 'score': conf,
                               'limb': self.index2handname[idx_offset+i]})
         return keypoints
diff --git a/jsk_perception/node_scripts/probability_image_classifier.py b/jsk_perception/node_scripts/probability_image_classifier.py
index 768ea81d72..65282c5199 100755
--- a/jsk_perception/node_scripts/probability_image_classifier.py
+++ b/jsk_perception/node_scripts/probability_image_classifier.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from label_image_classifier import LabelImageClassifier
+from jsk_perception.label_image_classifier import LabelImageClassifier
 import numpy as np
 import rospy
 
diff --git a/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py b/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py
index 3147d7cfe6..1bf0d8007a 100755
--- a/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py
+++ b/jsk_perception/node_scripts/rect_array_in_panorama_to_bounding_box_array.py
@@ -50,22 +50,22 @@ def __init__(self):
         self._dimensions_labels = rospy.get_param( '~dimensions_labels', {} )
         self._duration_timeout = rospy.get_param( '~duration_timeout', 0.05 )
 
-        try:
-            msg_panorama_image = rospy.wait_for_message(
-                '~panorama_image', Image, 10)
-            msg_panorama_info = rospy.wait_for_message(
-                '~panorama_info', PanoramaInfo, 10)
-        except (rospy.ROSException, rospy.ROSInterruptException) as e:
-            rospy.logerr('{}'.format(e))
-            sys.exit(1)
+        msg_panorama_info = None
+        while (not rospy.is_shutdown() and msg_panorama_info is None):
+            try:
+                msg_panorama_info = rospy.wait_for_message(
+                    '~panorama_info', PanoramaInfo, 10)
+            except (rospy.ROSException, rospy.ROSInterruptException) as e:
+                rospy.logerr('~panorama_info is not subscribed...')
+                rospy.logerr('waiting ~panorama_info for more 10 seconds')
 
         self._frame_panorama = msg_panorama_info.header.frame_id
         self._theta_min = msg_panorama_info.theta_min
         self._theta_max = msg_panorama_info.theta_max
         self._phi_min = msg_panorama_info.phi_min
         self._phi_max = msg_panorama_info.phi_max
-        self._image_height = msg_panorama_image.height
-        self._image_width = msg_panorama_image.width
+        self._image_height = msg_panorama_info.image_height
+        self._image_width = msg_panorama_info.image_width
  
         self._tf_buffer = tf2_ros.Buffer()
         self._tf_listener = tf2_ros.TransformListener(self._tf_buffer)
diff --git a/jsk_perception/node_scripts/regional_feature_based_object_recognition.py b/jsk_perception/node_scripts/regional_feature_based_object_recognition.py
index 0bebd48372..a7217494bb 100755
--- a/jsk_perception/node_scripts/regional_feature_based_object_recognition.py
+++ b/jsk_perception/node_scripts/regional_feature_based_object_recognition.py
@@ -73,6 +73,7 @@ def __init__(self):
         rospy.loginfo('Fitting KNN from db')
         db = np.load(db_file)
         X, y, self.target_names = db['X'], db['y'], db['target_names']
+        self.target_names = np.array(list(map(lambda name: name.decode('utf-8') if hasattr(name, 'decode') else name, self.target_names)))
         self.knn = KNeighborsClassifier(n_neighbors=10)
         self.knn.fit(X, y)
         rospy.loginfo('Finished fitting KNN from db')
diff --git a/jsk_perception/node_scripts/sklearn_classifier.py b/jsk_perception/node_scripts/sklearn_classifier.py
index e42dbef968..be3729f5c4 100755
--- a/jsk_perception/node_scripts/sklearn_classifier.py
+++ b/jsk_perception/node_scripts/sklearn_classifier.py
@@ -3,7 +3,11 @@
 #
 from __future__ import division
 import gzip
-import cPickle as pickle
+import sys
+if sys.version_info.major <= 2:
+    import cPickle as pickle
+else:  # for python3
+    import _pickle as pickle
 
 import numpy as np
 from sklearn.preprocessing import normalize
@@ -23,7 +27,10 @@ def __init__(self):
     def _init_classifier(self):
         clf_path = rospy.get_param('~clf_path')
         with gzip.open(clf_path) as f:
-            self.clf = pickle.load(f)
+            if sys.version_info.major <= 2:
+                self.clf = pickle.load(f)
+            else:
+                self.clf = pickle.load(f, encoding='latin1')
 
     def subscribe(self):
         self.sub_hist = rospy.Subscriber('~input', VectorArray, self._predict)
diff --git a/jsk_perception/node_scripts/solidity_rag_merge.py b/jsk_perception/node_scripts/solidity_rag_merge.py
index 7f343d51b1..ba5e2a2b38 100755
--- a/jsk_perception/node_scripts/solidity_rag_merge.py
+++ b/jsk_perception/node_scripts/solidity_rag_merge.py
@@ -39,6 +39,8 @@
 
 def rag_solidity(labels, connectivity=2):
 
+    if LooseVersion(skimage.__version__) >= '0.16.0':
+        RAG.node = RAG.nodes
     graph = RAG()
 
     # The footprint is constructed in such a way that the first
diff --git a/jsk_perception/node_scripts/split_image.py b/jsk_perception/node_scripts/split_image.py
index a9f4b72595..66170ec3b0 100755
--- a/jsk_perception/node_scripts/split_image.py
+++ b/jsk_perception/node_scripts/split_image.py
@@ -47,5 +47,6 @@ def _split_cb(self, msg):
 
 if __name__ == '__main__':
     rospy.init_node('split_image')
+    rospy.logwarn("split_image.py would be deprecated. Please use NODELET version of split_image")
     SplitImage()
     rospy.spin()
diff --git a/jsk_perception/node_scripts/ssd_object_detector.py b/jsk_perception/node_scripts/ssd_object_detector.py
index e95d189f67..359568f9ae 100755
--- a/jsk_perception/node_scripts/ssd_object_detector.py
+++ b/jsk_perception/node_scripts/ssd_object_detector.py
@@ -186,9 +186,9 @@ def image_cb(self, msg):
 
         rect_msg = RectArray(header=msg.header)
         for bbox in bboxes:
-            rect = Rect(x=bbox[1], y=bbox[0],
-                        width= bbox[3] - bbox[1],
-                        height=bbox[2] - bbox[0])
+            rect = Rect(x=int(bbox[1]), y=int(bbox[0]),
+                        width= int(bbox[3] - bbox[1]),
+                        height=int(bbox[2] - bbox[0]))
             rect_msg.rects.append(rect)
 
         if self.profiling:
diff --git a/jsk_perception/node_scripts/vgg16_object_recognition.py b/jsk_perception/node_scripts/vgg16_object_recognition.py
index ce29e13175..4169ecac57 100755
--- a/jsk_perception/node_scripts/vgg16_object_recognition.py
+++ b/jsk_perception/node_scripts/vgg16_object_recognition.py
@@ -43,109 +43,8 @@
 import rospy
 from sensor_msgs.msg import Image
 
+from jsk_perception.vgg16_object_recognition import VGG16ObjectRecognition
 
-class VGG16ObjectRecognition(ConnectionBasedTransport):
-
-    mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
-
-    def __init__(self):
-        super(self.__class__, self).__init__()
-        self.insize = 224
-        self.gpu = rospy.get_param('~gpu', -1)
-        self.target_names = rospy.get_param('~target_names')
-        self.model_name = rospy.get_param('~model_name')
-        if self.model_name == 'vgg16':
-            self.model = VGG16(n_class=len(self.target_names))
-        elif self.model_name == 'vgg16_batch_normalization':
-            self.model = VGG16BatchNormalization(
-                n_class=len(self.target_names))
-        else:
-            rospy.logerr('Unsupported ~model_name: {0}'
-                         .format(self.model_name))
-        model_file = rospy.get_param('~model_file')
-        S.load_hdf5(model_file, self.model)
-        if self.gpu != -1:
-            self.model.to_gpu(self.gpu)
-        self.pub = self.advertise('~output', ClassificationResult,
-                                  queue_size=1)
-        self.pub_input = self.advertise(
-            '~debug/net_input', Image, queue_size=1)
-
-    def subscribe(self):
-        if rospy.get_param('~use_mask', False):
-            # larger buff_size is necessary for taking time callback
-            # http://stackoverflow.com/questions/26415699/ros-subscriber-not-up-to-date/29160379#29160379  # NOQA
-            sub = message_filters.Subscriber(
-                '~input', Image, queue_size=1, buff_size=2**24)
-            sub_mask = message_filters.Subscriber(
-                '~input/mask', Image, queue_size=1, buff_size=2**24)
-            self.subs = [sub, sub_mask]
-            queue_size = rospy.get_param('~queue_size', 10)
-            if rospy.get_param('~approximate_sync', False):
-                slop = rospy.get_param('~slop', 0.1)
-                sync = message_filters.ApproximateTimeSynchronizer(
-                    self.subs, queue_size=queue_size, slop=slop)
-            else:
-                sync = message_filters.TimeSynchronizer(
-                    self.subs, queue_size=queue_size)
-            sync.registerCallback(self._recognize)
-        else:
-            sub = rospy.Subscriber(
-                '~input', Image, self._recognize, callback_args=None,
-                queue_size=1, buff_size=2**24)
-            self.subs = [sub]
-
-    def unsubscribe(self):
-        for sub in self.subs:
-            sub.unregister()
-
-    def _recognize(self, imgmsg, mask_msg=None):
-        bridge = cv_bridge.CvBridge()
-        bgr = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
-        if mask_msg is not None:
-            mask = bridge.imgmsg_to_cv2(mask_msg)
-            if mask.shape != bgr.shape[:2]:
-                logerr_throttle(10,
-                                'Size of input image and mask is different')
-                return
-            elif mask.size == 0:
-                logerr_throttle(10, 'Size of input mask is 0')
-                return
-            bgr[mask == 0] = self.mean_bgr
-        bgr = skimage.transform.resize(
-            bgr, (self.insize, self.insize), preserve_range=True)
-        input_msg = bridge.cv2_to_imgmsg(bgr.astype(np.uint8), encoding='bgr8')
-        input_msg.header = imgmsg.header
-        self.pub_input.publish(input_msg)
-
-        blob = (bgr - self.mean_bgr).transpose((2, 0, 1))
-        x_data = np.array([blob], dtype=np.float32)
-        if self.gpu != -1:
-            x_data = cuda.to_gpu(x_data, device=self.gpu)
-        if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'):
-            x = Variable(x_data, volatile=True)
-            self.model.train = False
-            self.model(x)
-        else:
-            with chainer.using_config('train', False), \
-                    chainer.no_backprop_mode():
-                x = Variable(x_data)
-                self.model(x)
-
-        proba = cuda.to_cpu(self.model.pred.data)[0]
-        label = np.argmax(proba)
-        label_name = self.target_names[label]
-        label_proba = proba[label]
-        cls_msg = ClassificationResult(
-            header=imgmsg.header,
-            labels=[label],
-            label_names=[label_name],
-            label_proba=[label_proba],
-            probabilities=proba,
-            classifier=self.model_name,
-            target_names=self.target_names,
-        )
-        self.pub.publish(cls_msg)
 
 
 if __name__ == '__main__':
diff --git a/jsk_perception/node_scripts/vqa_node.py b/jsk_perception/node_scripts/vqa_node.py
new file mode 100755
index 0000000000..6f3cea1eaf
--- /dev/null
+++ b/jsk_perception/node_scripts/vqa_node.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import rospy
+from jsk_perception.vil_inference_client import OFAClientNode
+
+
+def main():
+    rospy.init_node("vqa")
+    node = OFAClientNode()
+    rospy.spin()
+
+if __name__ == "__main__":
+    main()
diff --git a/jsk_perception/package.xml b/jsk_perception/package.xml
index 9b61648d03..b7e25d7f9c 100644
--- a/jsk_perception/package.xml
+++ b/jsk_perception/package.xml
@@ -94,6 +94,8 @@
   <exec_depend>python-fcn-pip</exec_depend>  <!-- pip -->
   <!-- }} install fcn -->
   <exec_depend version_lt="0.3.7">python-pytesseract-pip</exec_depend>
+  <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-requests</exec_depend>
+  <exec_depend condition="$ROS_PYTHON_VERSION == 3">python3-requests</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-sklearn</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 3">python3-sklearn</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-yaml</exec_depend>
@@ -119,9 +121,12 @@
   <!-- ALPHABETICAL ORDER -->
 
   <!-- ALPHABETICAL ORDER -->
+  <test_depend>compressed_depth_image_transport</test_depend>
+  <test_depend>compressed_image_transport</test_depend>
   <test_depend>jsk_tools</test_depend>
   <test_depend>roslaunch</test_depend>
   <test_depend>roslint</test_depend>
+  <test_depend>rosservice</test_depend>
   <test_depend>rostest</test_depend>
   <!-- ALPHABETICAL ORDER -->
 
diff --git a/jsk_perception/plugins/nodelet/libjsk_perception.xml b/jsk_perception/plugins/nodelet/libjsk_perception.xml
index 2d6d012b52..35828883fc 100644
--- a/jsk_perception/plugins/nodelet/libjsk_perception.xml
+++ b/jsk_perception/plugins/nodelet/libjsk_perception.xml
@@ -143,6 +143,9 @@
   <class name="jsk_perception/UnapplyMaskImage" type="jsk_perception::UnapplyMaskImage"
          base_class_type="nodelet::Nodelet">
   </class>
+  <class name="jsk_perception/InvertMaskImage" type="jsk_perception::InvertMaskImage"
+         base_class_type="nodelet::Nodelet">
+  </class>
   <class name="jsk_perception/GridLabel" type="jsk_perception::GridLabel"
          base_class_type="nodelet::Nodelet">
   </class>
@@ -290,8 +293,20 @@
          type="jsk_perception::VirtualCameraMono"
          base_class_type="nodelet::Nodelet">
   </class>
+  <class name="jsk_perception/PointPoseExtractor"
+         type="jsk_perception::PointPoseExtractor"
+         base_class_type="nodelet::Nodelet">
+  </class>
   <class name="jsk_perception/RemoveBlurredFrames"
          type="jsk_perception::RemoveBlurredFrames"
          base_class_type="nodelet::Nodelet">
   </class>
+  <class name="jsk_perception/VideoToScene"
+         type="jsk_perception::VideoToScene"
+         base_class_type="nodelet::Nodelet">
+  </class>
+  <class name="jsk_perception/SplitImage"
+         type="jsk_perception::SplitImage"
+         base_class_type="nodelet::Nodelet">
+  </class>
 </library>
diff --git a/jsk_perception/sample/config/sample_ofa_config.rviz b/jsk_perception/sample/config/sample_ofa_config.rviz
new file mode 100644
index 0000000000..458138e652
--- /dev/null
+++ b/jsk_perception/sample/config/sample_ofa_config.rviz
@@ -0,0 +1,181 @@
+Panels:
+  - Class: rviz/Displays
+    Help Height: 138
+    Name: Displays
+    Property Tree Widget:
+      Expanded: ~
+      Splitter Ratio: 0.4870370328426361
+    Tree Height: 509
+  - Class: rviz/Selection
+    Name: Selection
+  - Class: rviz/Tool Properties
+    Expanded:
+      - /2D Pose Estimate1
+      - /2D Nav Goal1
+      - /Publish Point1
+    Name: Tool Properties
+    Splitter Ratio: 0.5886790156364441
+  - Class: rviz/Views
+    Expanded:
+      - /Current View1
+    Name: Views
+    Splitter Ratio: 0.5
+  - Class: rviz/Time
+    Name: Time
+    SyncMode: 0
+    SyncSource: ""
+Preferences:
+  PromptSaveOnExit: true
+Toolbars:
+  toolButtonStyle: 2
+Visualization Manager:
+  Class: ""
+  Displays:
+    - Alpha: 0.5
+      Cell Size: 1
+      Class: rviz/Grid
+      Color: 160; 160; 164
+      Enabled: false
+      Line Style:
+        Line Width: 0.029999999329447746
+        Value: Lines
+      Name: Grid
+      Normal Cell Count: 0
+      Offset:
+        X: 0
+        Y: 0
+        Z: 0
+      Plane: XY
+      Plane Cell Count: 10
+      Reference Frame: <Fixed Frame>
+      Value: false
+    - Class: jsk_rviz_plugin/OverlayImage
+      Enabled: true
+      Name: VQA/VQA/Image
+      Topic: /vqa/result/image
+      Value: true
+      alpha: 0.800000011920929
+      height: 128
+      keep aspect ratio: true
+      left: 10
+      overwrite alpha value: false
+      top: 10
+      transport hint: raw
+      width: 320
+    - Align Bottom: false
+      Background Alpha: 0.800000011920929
+      Background Color: 0; 0; 0
+      Class: jsk_rviz_plugin/String
+      Enabled: true
+      Foreground Alpha: 0.800000011920929
+      Foreground Color: 255; 255; 255
+      Name: VQA/VQA/Visualize
+      Overtake Color Properties: true
+      Overtake Position Properties: true
+      Topic: /vqa/visualize
+      Value: true
+      font: DejaVu Sans Mono
+      height: 500
+      left: 10
+      line width: 2
+      text size: 12
+      top: 320
+      width: 512
+    - Class: jsk_rviz_plugin/OverlayImage
+      Enabled: true
+      Name: Classification/Classification/Image
+      Topic: /classification/result/image
+      Value: true
+      alpha: 0.800000011920929
+      height: 128
+      keep aspect ratio: true
+      left: 530
+      overwrite alpha value: false
+      top: 10
+      transport hint: raw
+      width: 320
+    - Align Bottom: false
+      Background Alpha: 0.800000011920929
+      Background Color: 0; 0; 0
+      Class: jsk_rviz_plugin/String
+      Enabled: true
+      Foreground Alpha: 0.800000011920929
+      Foreground Color: 255; 255; 255
+      Name: ClassificationClassification/Visualize
+      Overtake Color Properties: true
+      Overtake Position Properties: true
+      Topic: /classification/visualize
+      Value: true
+      font: DejaVu Sans Mono
+      height: 500
+      left: 530
+      line width: 2
+      text size: 12
+      top: 320
+      width: 512
+  Enabled: true
+  Global Options:
+    Background Color: 48; 48; 48
+    Default Light: true
+    Fixed Frame: map
+    Frame Rate: 30
+  Name: root
+  Tools:
+    - Class: rviz/Interact
+      Hide Inactive Objects: true
+    - Class: rviz/MoveCamera
+    - Class: rviz/Select
+    - Class: rviz/FocusCamera
+    - Class: rviz/Measure
+    - Class: rviz/SetInitialPose
+      Theta std deviation: 0.2617993950843811
+      Topic: /initialpose
+      X std deviation: 0.5
+      Y std deviation: 0.5
+    - Class: rviz/SetGoal
+      Topic: /move_base_simple/goal
+    - Class: rviz/PublishPoint
+      Single click: true
+      Topic: /clicked_point
+  Value: true
+  Views:
+    Current:
+      Class: rviz/Orbit
+      Distance: 10
+      Enable Stereo Rendering:
+        Stereo Eye Separation: 0.05999999865889549
+        Stereo Focal Distance: 1
+        Swap Stereo Eyes: false
+        Value: false
+      Field of View: 0.7853981852531433
+      Focal Point:
+        X: 0
+        Y: 0
+        Z: 0
+      Focal Shape Fixed Size: true
+      Focal Shape Size: 0.05000000074505806
+      Invert Z Axis: false
+      Name: Current View
+      Near Clip Distance: 0.009999999776482582
+      Pitch: 0.785398006439209
+      Target Frame: <Fixed Frame>
+      Yaw: 0.785398006439209
+    Saved: ~
+Window Geometry:
+  Displays:
+    collapsed: false
+  Height: 1025
+  Hide Left Dock: false
+  Hide Right Dock: true
+  QMainWindow State: 000000ff00000000fd00000004000000000000021e000002f7fc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b000000b100fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000b1fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000006f000002f70000018400fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000010f0000037efc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003d0000037e0000013500fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000078000000060fc0100000002fb0000000800540069006d0065010000000000000780000005cd00fffffffb0000000800540069006d0065010000000000000450000000000000000000000556000002f700000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+  Selection:
+    collapsed: false
+  Time:
+    collapsed: false
+  Tool Properties:
+    collapsed: false
+  Views:
+    collapsed: true
+  Width: 1920
+  X: 1440
+  Y: 1096
diff --git a/jsk_perception/sample/image/items_in_shelf.jpg b/jsk_perception/sample/image/items_in_shelf.jpg
new file mode 100644
index 0000000000..699917fa3f
Binary files /dev/null and b/jsk_perception/sample/image/items_in_shelf.jpg differ
diff --git a/jsk_perception/sample/image/pai_no_mi.jpg b/jsk_perception/sample/image/pai_no_mi.jpg
new file mode 100644
index 0000000000..14aff8a629
Binary files /dev/null and b/jsk_perception/sample/image/pai_no_mi.jpg differ
diff --git a/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml b/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml
index b12b21acbb..498dafe82f 100644
--- a/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml
+++ b/jsk_perception/sample/include/play_rosbag_pr2_self_see.xml
@@ -2,7 +2,7 @@
   <param name="/use_sim_time" value="true"/>
   <arg name="rosbag_play_args" default="--clock --loop" />
   <param name="robot_description"
-         command="$(find xacro)/xacro.py '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
+         command="$(find xacro)/xacro '$(find pr2_description)/robots/pr2.urdf.xacro'"/>
 
   <node name="rosbag_play" pkg="rosbag" type="play"
         args="$(find jsk_perception)/sample/data/2019-07-18-15-37-50_pr2_self_see.bag $(arg rosbag_play_args)"/>
diff --git a/jsk_perception/sample/sample_classification.launch b/jsk_perception/sample/sample_classification.launch
new file mode 100644
index 0000000000..71984cea22
--- /dev/null
+++ b/jsk_perception/sample/sample_classification.launch
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="run_api" default="true" />
+  <arg name="gui" default="true" />
+  <arg name="CLASSIFICATION_INPUT_IMAGE" default="/usb_cam/image_raw" />
+
+  <node name="usb_cam" pkg="usb_cam" type="usb_cam_node" />
+
+  <include file="$(find jsk_perception)/launch/classification.launch" >
+    <arg name="run_api" value="$(arg run_api)" />
+    <arg name="gui" value="$(arg gui)" />
+    <arg name="CLASSIFICATION_INPUT_IMAGE" value="$(arg CLASSIFICATION_INPUT_IMAGE)" />
+  </include>
+
+</launch>
diff --git a/jsk_perception/sample/sample_color_histogram.launch b/jsk_perception/sample/sample_color_histogram.launch
index b5c1fb791b..d75d2fc11c 100644
--- a/jsk_perception/sample/sample_color_histogram.launch
+++ b/jsk_perception/sample/sample_color_histogram.launch
@@ -24,8 +24,9 @@
       use_window: $(arg gui)
     </rosparam>
   </node>
+  <!-- workaround until https://github.com/jsk-ros-pkg/jsk_common/pull/1774 is merged and released -->
   <node name="publish_mouse_event"
-        pkg="image_view2" type="publish_mouse_event.py">
+        pkg="jsk_perception" type="publish_mouse_event.py">
     <remap from="~plus_rect_event" to="image_publisher/output/image/event" />
     <rosparam>
       image_width: 256
diff --git a/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch b/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch
index 12d867d767..457a49071d 100644
--- a/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch
+++ b/jsk_perception/sample/sample_dual_fisheye_to_panorama.launch
@@ -6,21 +6,33 @@
   <arg name="fovd" default="185.0" doc="Camera's Field of View [degree]" />
   <arg name="save_unwarped" default="false" doc="Save unwarped fisheye images. This is useful to create MLS grids." />
   <arg name="resolution_mode" default="high" doc="Resolution mode, 'high' or 'low'" />
+  <arg name="vital_rate" default="1.0" doc="Rate to determine if the nodelet is in health." />
   <arg name="image_transport" default="raw"/>
+  <arg name="always_subscribe" default="false"/>
+
+  <arg name="launch_manager" default="true" />
+  <arg name="MANAGER" default="dual_fisheye_to_panorama_manager" />
+
+  <node name="$(arg MANAGER)"
+        pkg="nodelet" type="nodelet" args="manager"
+        if="$(arg launch_manager)"
+        output="screen" />
 
   <node name="dual_fisheye_to_panorama"
         pkg="nodelet" type="nodelet"
-        args="standalone jsk_perception/DualFisheyeToPanorama">
+        args="load jsk_perception/DualFisheyeToPanorama $(arg MANAGER)">
     <remap from="~input" to="$(arg INPUT_IMAGE)"/>
     <param name="light_compen" value="false" />
     <param name="refine_align" value="$(arg refine_align)" />
     <param name="fovd" value="$(arg fovd)" />
     <param name="save_unwarped" value="$(arg save_unwarped)" />
     <param name="image_transport" value="$(arg image_transport)"/>
+    <param name="always_subscribe" value="$(arg always_subscribe)"/>
     <rosparam
         command="load"
         file="$(find jsk_perception)/config/dual_fisheye_to_panorama/$(arg resolution_mode)_resolution_config.yaml"
         subst_value="true"/>
+    <param name="vital_rate" value="$(arg vital_rate)" />
   </node>
 
   <group if="$(arg gui)">
diff --git a/jsk_perception/sample/sample_insta360_air.launch b/jsk_perception/sample/sample_insta360_air.launch
index 06bbba3e5e..b79b010c55 100644
--- a/jsk_perception/sample/sample_insta360_air.launch
+++ b/jsk_perception/sample/sample_insta360_air.launch
@@ -27,7 +27,16 @@
   <arg name="panorama_resolution_mode" default="high" />
   <arg name="panorama_image_transport" default="raw"/>
 
+  <arg name="vital_rate" default="1.0" />
   <arg name="gui" default="true" />
+  <arg name="launch_manager" default="true" />
+  <arg name="MANAGER" default="insta360_manager" />
+
+  <node name="$(arg MANAGER)"
+        pkg="nodelet" type="nodelet" args="manager"
+        if="$(arg launch_manager)"
+        output="screen" />
+  <arg name="always_subscribe" default="false" />
 
   <!-- Publish camera image -->
   <group if="$(arg publish_image)">
@@ -57,7 +66,10 @@
   </group>
 
   <!-- Split insta360 air image into 2 fisheye image -->
-  <node pkg="jsk_perception" name="split_image" type="split_image.py" output="screen">
+  <node name="split_image"
+        pkg="nodelet" type="nodelet"
+        args="load jsk_perception/SplitImage $(arg MANAGER)"
+        respawn="true">
     <remap from="~input" to="/insta360/image_raw" />
     <rosparam>
       vertical_parts: 1
@@ -68,7 +80,7 @@
   <group ns="insta360" if="$(arg throttle)">
     <node name="throttle_camera_info"
           pkg="nodelet" type="nodelet"
-          args="standalone jsk_topic_tools/LightweightThrottle"
+          args="load jsk_topic_tools/LightweightThrottle /$(arg MANAGER)"
           respawn="true">
       <remap from="~input" to="camera_info"/>
       <remap from="~output" to="throttled/camera_info" />
@@ -76,7 +88,7 @@
     </node>
     <node name="throttle_rgb"
           pkg="nodelet" type="nodelet"
-          args="standalone jsk_topic_tools/LightweightThrottle"
+          args="load jsk_topic_tools/LightweightThrottle /$(arg MANAGER)"
           respawn="true">
       <remap from="~input" to="image_raw" />
       <remap from="~output" to="throttled/image_raw" />
@@ -84,7 +96,7 @@
     </node>
     <node name="throttle_rgb_compressed"
           pkg="nodelet" type="nodelet"
-          args="standalone jsk_topic_tools/LightweightThrottle"
+          args="load jsk_topic_tools/LightweightThrottle /$(arg MANAGER)"
           respawn="true">
       <remap from="~input" to="image_raw/compressed" />
       <remap from="~output" to="throttled/image_raw/compressed" />
@@ -101,6 +113,10 @@
     <arg name="save_unwarped" value="$(arg save_unwarped)" />
     <arg name="resolution_mode" value="$(arg panorama_resolution_mode)" />
     <arg name="image_transport" value="$(arg panorama_image_transport)" />
+    <arg name="vital_rate" value="$(arg vital_rate)" />
+    <arg name="launch_manager" value="false" />
+    <arg name="MANAGER" value="$(arg MANAGER)" />
+    <arg name="always_subscribe" value="$(arg always_subscribe)" />
   </include>
 
   <group if="$(arg gui)">
diff --git a/jsk_perception/sample/sample_invert_mask_image.launch b/jsk_perception/sample/sample_invert_mask_image.launch
new file mode 100644
index 0000000000..d7c338f962
--- /dev/null
+++ b/jsk_perception/sample/sample_invert_mask_image.launch
@@ -0,0 +1,81 @@
+<launch>
+
+  <arg name="gui" default="true" />
+  <arg name="launch_manager" default="true" />
+
+  <arg name="MANAGER" default="sample_manager" />
+  <node name="$(arg MANAGER)"
+        pkg="nodelet" type="nodelet" args="manager"
+        if="$(arg launch_manager)"
+        output="screen" />
+
+  <node name="image_publisher"
+        pkg="jsk_perception" type="image_publisher.py">
+    <rosparam subst_value="true">
+      file_name: $(find jsk_perception)/sample/kiva_pod_image_color.jpg
+      encoding: bgr8
+      publish_info: false
+    </rosparam>
+  </node>
+  <arg name="INPUT_IMAGE" default="image_publisher/output" />
+
+  <node name="mask_publisher"
+        pkg="jsk_perception" type="image_publisher.py">
+    <rosparam subst_value="true">
+      file_name: $(find jsk_perception)/sample/kiva_pod_mask.jpg
+      encoding: mono8
+      publish_info: false
+    </rosparam>
+  </node>
+  <arg name="MASK_IMAGE" default="mask_publisher/output" />
+
+  <node name="invert_mask_image"
+        pkg="nodelet" type="nodelet"
+        args="load jsk_perception/InvertMaskImage $(arg MANAGER)"
+        respawn="true" >
+    <remap from="~input" to="mask_publisher/output" />
+  </node>
+  <arg name="INVERTED_MASK_IMAGE" default="invert_mask_image/output" />
+
+  <node name="apply_mask_image"
+        pkg="nodelet" type="nodelet"
+        args="load jsk_perception/ApplyMaskImage $(arg MANAGER)"
+        respawn="true" >
+    <remap from="~input" to="$(arg INPUT_IMAGE)" />
+    <remap from="~input/mask" to="$(arg MASK_IMAGE)" />
+    <rosparam>
+      approximate_sync: true
+    </rosparam>
+  </node>
+
+  <node name="apply_mask_image_with_invert_mask_image"
+        pkg="nodelet" type="nodelet"
+        args="load jsk_perception/ApplyMaskImage $(arg MANAGER)"
+        respawn="true" >
+    <remap from="~input" to="$(arg INPUT_IMAGE)" />
+    <remap from="~input/mask" to="$(arg INVERTED_MASK_IMAGE)" />
+    <rosparam>
+      approximate_sync: true
+    </rosparam>
+  </node>
+
+  <group if="$(arg gui)">
+    <node name="image_view0"
+          pkg="image_view" type="image_view">
+      <remap from="image" to="$(arg INPUT_IMAGE)" />
+    </node>
+    <node name="image_view1"
+          pkg="image_view" type="image_view">
+      <remap from="image" to="$(arg MASK_IMAGE)" />
+    </node>
+    <node name="image_view2"
+          pkg="image_view" type="image_view">
+      <remap from="image" to="apply_mask_image/output" />
+    </node>
+    <node name="image_view3"
+          pkg="image_view" type="image_view">
+      <remap from="image" to="apply_mask_image_with_invert_mask_image/output" />
+    </node>
+  </group>
+
+</launch>
diff --git a/jsk_perception/sample/sample_point_pose_extractor.launch b/jsk_perception/sample/sample_point_pose_extractor.launch
new file mode 100644
index 0000000000..07112f8081
--- /dev/null
+++ b/jsk_perception/sample/sample_point_pose_extractor.launch
@@ -0,0 +1,40 @@
+<launch>
+  <arg name="gui" default="true" />
+
+  <node name="raw_image_bgr"
+        pkg="jsk_perception" type="image_publisher.py">
+    <remap from="~output" to="~image_color" />
+    <remap from="~output/camera_info" to="~camera_info" />
+    <rosparam subst_value="true">
+      file_name: $(find jsk_perception)/sample/image/items_in_shelf.jpg
+      encoding: bgr8
+      publish_info: true
+      fovx: 84.1
+      fovy: 53.8
+    </rosparam>
+  </node>
+
+  <node name="nodelet_manager"
+        pkg="nodelet" type="nodelet"
+        args="manager" output="screen"/>
+
+  <node name="imagesift"
+        pkg="nodelet" type="nodelet"
+        args="load imagesift/ImageSift nodelet_manager">
+    <remap from="image" to="/raw_image_bgr/image_color"/>
+    <remap from="camera_info" to="/raw_image_bgr/camera_info" />
+  </node>
+
+  <node name="point_pose_extractor"
+        pkg="nodelet" type="nodelet"
+        args="load jsk_perception/PointPoseExtractor nodelet_manager"
+        respawn="false" output="screen" required="true" >
+    <rosparam subst_value="true">
+      template_filename: $(find jsk_perception)/sample/image/pai_no_mi.jpg
+      template_id: 0
+      object_width: 0.145
+      object_height: 0.128
+      viewer_window: $(arg gui)
+    </rosparam>
+  </node>
+</launch>
diff --git a/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch b/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch
index e3982cec2b..9d0a9a5a0e 100644
--- a/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch
+++ b/jsk_perception/sample/sample_rect_array_in_panorama_to_bounding_box_array.launch
@@ -47,7 +47,6 @@
     type="rect_array_in_panorama_to_bounding_box_array.py"
     name="rect_array_in_panorama_to_bounding_box_array"
     >
-    <remap from="~panorama_image" to="$(arg INPUT_PANORAMA_IMAGE)" />
     <remap from="~panorama_info" to="$(arg INPUT_PANORAMA_INFO)" />
     <remap from="~input_class" to="$(arg INPUT_CLASS)" />
     <remap from="~input_rects" to="$(arg INPUT_RECTS)" />
diff --git a/jsk_perception/sample/sample_split_image.launch b/jsk_perception/sample/sample_split_image.launch
index 400e9ba0a6..b683a995ea 100644
--- a/jsk_perception/sample/sample_split_image.launch
+++ b/jsk_perception/sample/sample_split_image.launch
@@ -12,14 +12,16 @@
       publish_info: true
       fovx: 84.1
       fovy: 53.8
+      rate: 60.0
     </rosparam>
   </node>
 
-  <node pkg="jsk_perception" name="split_image" type="split_image.py" output="screen">
+  <node type="split_image" pkg="jsk_perception" name="split_image" output="screen">
     <remap from="~input" to="image_publisher/image_color" />
     <rosparam>
       vertical_parts: 2
       horizontal_parts: 2
+      always_subscribe: true
     </rosparam>
   </node>
 
diff --git a/jsk_perception/sample/sample_video_to_scene.launch b/jsk_perception/sample/sample_video_to_scene.launch
new file mode 100644
index 0000000000..94a4c1d0af
--- /dev/null
+++ b/jsk_perception/sample/sample_video_to_scene.launch
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="gui" default="true" />
+  <arg name="min_percent" default="8" />
+  <arg name="max_percent" default="20" />
+
+  <param name="/use_sim_time" value="true"/>
+  <node name="rosbag_play" pkg="rosbag" type="play"
+        args="$(find jsk_perception)/sample/data/2019-07-18-15-37-50_pr2_self_see.bag --clock --loop --topics /kinect_head/rgb/image_rect_color/compressed"/>
+
+  <include file="$(find jsk_perception)/launch/video_to_scene.launch" >
+    <arg name="INPUT_IMAGE" value="/kinect_head/rgb/image_rect_color" />
+    <arg name="IMAGE_TRANSPORT" default="compressed" />
+    <arg name="launch_manager" value="true" />
+    <arg name="manager" value="rosbag_play_nodelet_manager" />
+    <arg name="min_percent" value="$(arg min_percent)" />
+    <arg name="max_percent" value="$(arg max_percent)" />
+  </include>
+  <node pkg="rosservice" type="rosservice" name="set_logger_level"
+        args="call /rosbag_play_nodelet_manager/set_logger_level ros.jsk_perception./video_to_scene debug" />
+
+  <group if="$(arg gui)">
+    <node name="image_view_rgb"
+          pkg="image_view" type="image_view">
+      <remap from="image" to="/kinect_head/rgb/image_rect_color"/>
+      <param name="image_transport" value="compressed" />
+    </node>
+    <node name="image_view_fg"
+          pkg="image_view" type="image_view">
+      <remap from="image" to="/kinect_head/rgb/image_rect_color/output"/>
+      <param name="image_transport" value="compressed" />
+    </node>
+  </group>
+
+</launch>
diff --git a/jsk_perception/sample/sample_vqa.launch b/jsk_perception/sample/sample_vqa.launch
new file mode 100644
index 0000000000..daa9865e50
--- /dev/null
+++ b/jsk_perception/sample/sample_vqa.launch
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="run_api" default="true" />
+  <arg name="gui" default="true" />
+  <arg name="VQA_INPUT_IMAGE" default="/usb_cam/image_raw" />
+
+  <node name="usb_cam" pkg="usb_cam" type="usb_cam_node" />
+
+  <include file="$(find jsk_perception)/launch/vqa.launch" >
+    <arg name="run_api" value="$(arg run_api)" />
+    <arg name="gui" value="$(arg gui)" />
+    <arg name="VQA_INPUT_IMAGE" value="$(arg VQA_INPUT_IMAGE)" />
+  </include>
+
+</launch>
diff --git a/jsk_perception/scripts/send_classification_query_action.py b/jsk_perception/scripts/send_classification_query_action.py
new file mode 100755
index 0000000000..db15f17a02
--- /dev/null
+++ b/jsk_perception/scripts/send_classification_query_action.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+import actionlib
+import jsk_recognition_msgs.msg
+import rospy
+from sensor_msgs.msg import Image
+
+def cb(image):
+    ac = actionlib.SimpleActionClient("/classification/inference_server",
+                                      jsk_recognition_msgs.msg.ClassificationTaskAction)
+    ac.wait_for_server()
+    goal = jsk_recognition_msgs.msg.ClassificationTaskGoal()
+    goal.image = image
+    goal.queries = ["human", "apple", "book"]
+    ac.send_goal(goal)
+    ac.wait_for_result()
+    print(ac.get_result())
+
+rospy.init_node("test_classification_action")
+sub = rospy.Subscriber("/usb_cam/image_raw", Image, cb)
+rospy.spin()
diff --git a/jsk_perception/scripts/send_vqa_query_action.py b/jsk_perception/scripts/send_vqa_query_action.py
new file mode 100755
index 0000000000..ad1325290a
--- /dev/null
+++ b/jsk_perception/scripts/send_vqa_query_action.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+import actionlib
+import jsk_recognition_msgs.msg
+import rospy
+from sensor_msgs.msg import Image
+
+def cb(image):
+    ac = actionlib.SimpleActionClient("/vqa/inference_server" , jsk_recognition_msgs.msg.VQATaskAction)
+    ac.wait_for_server()
+    goal = jsk_recognition_msgs.msg.VQATaskGoal()
+    goal.image = image
+    goal.questions = ["what does this image decribe?"]
+    ac.send_goal(goal)
+    ac.wait_for_result()
+    print(ac.get_result())
+
+rospy.init_node("test_vqa_action")
+sub = rospy.Subscriber("/usb_cam/image_raw", Image, cb)
+rospy.spin()
diff --git a/jsk_perception/scripts/vqa_request.py b/jsk_perception/scripts/vqa_request.py
new file mode 100755
index 0000000000..ee7c1e8df3
--- /dev/null
+++ b/jsk_perception/scripts/vqa_request.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+
+import argparse
+import actionlib
+import cv2
+from cv_bridge import CvBridge
+from jsk_recognition_msgs.msg import VQATaskAction, VQATaskActionGoal
+import json
+import rospy # for logging
+import os
+
+class VQAClient(object):
+    def __init__(self):
+        self.ac_caption_client = actionlib.SimpleActionClient("/vqa/vqa_server", VQATaskAction)
+        self._bridge = CvBridge()
+
+    def request(self, questions_path, image_path, output_dir=None):
+        self.ac_caption_client.wait_for_server()
+        caption_goal = VQATaskActionGoal()
+        cv_image = cv2.imread(image_path)
+        image = self._bridge.cv2_to_imgmsg(cv_image, "bgr8")
+        caption_goal.goal.image = image
+        with open(questions_path) as f:
+            for q in f:
+                caption_goal.goal.questions.append(q)
+        self.ac_caption_client.send_goal(caption_goal.goal)
+        self.ac_caption_client.wait_for_result()
+        caption_result = self.ac_caption_client.get_result()
+        result = {}
+        for r in caption_result.result.result:
+            result[r.question] = r.answer
+        image_name = os.path.splitext(os.path.basename(image_path))[0]
+        json_name = image_name + ".json"
+        if output_dir:
+            save_path = os.path.join(output_dir, json_name)
+        else:
+            save_path = os.path.join(os.path.dirname(image_path), json_name)
+        with open(save_path, "w") as f:
+            json.dump(result, f, indent=4, separators=(',', ': '))
+
+parser = argparse.ArgumentParser(description="CLI interface for VQA action client")
+parser.add_argument("questions_path", help="Question text file path of VQA input", type=str)
+parser.add_argument("image_path", help="Image file path of VQA input", type=str)
+parser.add_argument("-o", "--output", default=None)
+
+args = parser.parse_args()
+
+if __name__ == "__main__":
+    rospy.init_node("vqa_request_client", anonymous=True)
+    client = VQAClient()
+    questions_path = args.questions_path
+    image_path = args.image_path
+    output = args.output
+    client.request(questions_path, image_path, output)
diff --git a/jsk_perception/setup.py b/jsk_perception/setup.py
index 66f43a495a..b4eee0e9e8 100644
--- a/jsk_perception/setup.py
+++ b/jsk_perception/setup.py
@@ -3,6 +3,9 @@
 from setuptools import setup
 from catkin_pkg.python_setup import generate_distutils_setup
 
-d = generate_distutils_setup()
+d = generate_distutils_setup(
+    packages=['jsk_perception'],
+    package_dir={'': 'src'},
+)
 
 setup(**d)
diff --git a/jsk_perception/src/dual_fisheye_to_panorama.cpp b/jsk_perception/src/dual_fisheye_to_panorama.cpp
index d749cec51b..a10726d6ee 100644
--- a/jsk_perception/src/dual_fisheye_to_panorama.cpp
+++ b/jsk_perception/src/dual_fisheye_to_panorama.cpp
@@ -92,6 +92,8 @@ namespace jsk_perception
     msg_panorama_info_.theta_max = M_PI;
     msg_panorama_info_.phi_min = -M_PI;
     msg_panorama_info_.phi_max = M_PI;
+    msg_panorama_info_.image_height = output_image_height_;
+    msg_panorama_info_.image_width = output_image_width_;
 
     onInitPostProcess();
   }
diff --git a/jsk_perception/src/invert_mask_image.cpp b/jsk_perception/src/invert_mask_image.cpp
new file mode 100644
index 0000000000..3fca1bdd69
--- /dev/null
+++ b/jsk_perception/src/invert_mask_image.cpp
@@ -0,0 +1,88 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2022, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+
+#include "jsk_perception/invert_mask_image.h"
+#include <boost/assign.hpp>
+#include <jsk_topic_tools/log_utils.h>
+#include <opencv2/opencv.hpp>
+#include <cv_bridge/cv_bridge.h>
+
+
+namespace jsk_perception
+{
+  void InvertMaskImage::onInit()
+  {
+    DiagnosticNodelet::onInit();
+    pub_mask_ = advertise<sensor_msgs::Image>(*pnh_, "output", 1);
+    onInitPostProcess();
+  }
+
+  InvertMaskImage::~InvertMaskImage() {
+  }
+
+  void InvertMaskImage::subscribe()
+  {
+    int queue_size;
+    pnh_->param("queue_size", queue_size, 1);
+    sub_mask_ = pnh_->subscribe("input", queue_size, &InvertMaskImage::invert, this);
+    ros::V_string names = boost::assign::list_of("~input");
+    jsk_topic_tools::warnNoRemap(names);
+  }
+
+  void InvertMaskImage::unsubscribe()
+  {
+    sub_mask_.shutdown();
+  }
+
+  void InvertMaskImage::invert(const sensor_msgs::Image::ConstPtr& mask_msg)
+  {
+    vital_checker_->poke();
+    cv::Mat mask;
+    try {
+      mask = cv_bridge::toCvShare(mask_msg, "mono8")->image;
+    } catch(std::exception &e) {
+      ROS_ERROR("cv_bridge exception: %s", e.what());
+      return;
+    }
+    cv::bitwise_not(mask, mask);
+    pub_mask_.publish(cv_bridge::CvImage(
+                        mask_msg->header,
+                        "mono8",
+                        mask).toImageMsg());
+  }
+}
+
+#include <pluginlib/class_list_macros.h>
+PLUGINLIB_EXPORT_CLASS (jsk_perception::InvertMaskImage, nodelet::Nodelet);
diff --git a/jsk_perception/src/jsk_perception/__init__.py b/jsk_perception/src/jsk_perception/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/jsk_perception/src/jsk_perception/label_image_classifier.py b/jsk_perception/src/jsk_perception/label_image_classifier.py
new file mode 100644
index 0000000000..08567fb847
--- /dev/null
+++ b/jsk_perception/src/jsk_perception/label_image_classifier.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import cv_bridge
+from jsk_recognition_msgs.msg import ClassificationResult
+from jsk_topic_tools import ConnectionBasedTransport
+import numpy as np
+import rospy
+from sensor_msgs.msg import Image
+
+class LabelImageClassifier(ConnectionBasedTransport):
+
+    classifier_name = 'label_image_classifier'
+
+    def __init__(self):
+        super(LabelImageClassifier, self).__init__()
+        self.ignore_labels = rospy.get_param('~ignore_labels', [])
+        self.target_names = rospy.get_param('~target_names', [])
+        self.pub = self.advertise(
+            '~output', ClassificationResult, queue_size=1)
+
+    def subscribe(self):
+        self.sub = rospy.Subscriber('~input', Image, self._cb)
+
+    def unsubscribe(self):
+        self.sub.unregister()
+
+    def _cb(self, imgmsg):
+        bridge = cv_bridge.CvBridge()
+        img = bridge.imgmsg_to_cv2(imgmsg)
+        label, proba = self._classify(img)
+        msg = ClassificationResult()
+        msg.header = imgmsg.header
+        msg.labels = [label]
+        msg.label_names = [self.target_names[label]]
+        msg.label_proba = [proba[label]]
+        msg.probabilities = proba
+        msg.classifier = self.classifier_name
+        msg.target_names = self.target_names
+        self.pub.publish(msg)
+
+    def _classify(self, label_img):
+        counts = np.bincount(label_img.flatten(),
+                             minlength=len(self.target_names))
+        counts[self.ignore_labels] = 0
+        label = np.argmax(counts)
+        proba = counts.astype(np.float32) / counts.sum()
+        return label, proba
diff --git a/jsk_perception/src/jsk_perception/vgg16_object_recognition.py b/jsk_perception/src/jsk_perception/vgg16_object_recognition.py
new file mode 100644
index 0000000000..f582daa74b
--- /dev/null
+++ b/jsk_perception/src/jsk_perception/vgg16_object_recognition.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import itertools, pkg_resources, sys
+from distutils.version import LooseVersion
+if LooseVersion(pkg_resources.get_distribution("chainer").version) >= LooseVersion('7.0.0') and \
+        sys.version_info.major == 2:
+    print('''Please install chainer < 7.0.0:
+
+    sudo pip install chainer==6.7.0
+
+c.f https://github.com/jsk-ros-pkg/jsk_recognition/pull/2485
+''', file=sys.stderr)
+    sys.exit(1)
+if [p for p in list(itertools.chain(*[pkg_resources.find_distributions(_) for _ in sys.path])) if "cupy-" in p.project_name or "cupy" == p.project_name ] == []:
+    print('''Please install CuPy
+
+    sudo pip install cupy-cuda[your cuda version]
+i.e.
+    sudo pip install cupy-cuda91
+
+''', file=sys.stderr)
+    # sys.exit(1)
+import chainer
+from chainer import cuda
+import chainer.serializers as S
+from chainer import Variable
+from distutils.version import LooseVersion
+import numpy as np
+import skimage.transform
+
+import cv_bridge
+from jsk_recognition_msgs.msg import ClassificationResult
+from jsk_recognition_utils.chainermodels import VGG16
+from jsk_recognition_utils.chainermodels import VGG16BatchNormalization
+from jsk_topic_tools import ConnectionBasedTransport
+from jsk_topic_tools.log_utils import logerr_throttle
+import message_filters
+import rospy
+from sensor_msgs.msg import Image
+
+
+class VGG16ObjectRecognition(ConnectionBasedTransport):
+
+    mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
+
+    def __init__(self):
+        super(self.__class__, self).__init__()
+        self.insize = 224
+        self.gpu = rospy.get_param('~gpu', -1)
+        self.target_names = rospy.get_param('~target_names')
+        self.model_name = rospy.get_param('~model_name')
+        if self.model_name == 'vgg16':
+            self.model = VGG16(n_class=len(self.target_names))
+        elif self.model_name == 'vgg16_batch_normalization':
+            self.model = VGG16BatchNormalization(
+                n_class=len(self.target_names))
+        else:
+            rospy.logerr('Unsupported ~model_name: {0}'
+                         .format(self.model_name))
+        model_file = rospy.get_param('~model_file')
+        S.load_hdf5(model_file, self.model)
+        if self.gpu != -1:
+            self.model.to_gpu(self.gpu)
+        self.pub = self.advertise('~output', ClassificationResult,
+                                  queue_size=1)
+        self.pub_input = self.advertise(
+            '~debug/net_input', Image, queue_size=1)
+
+    def subscribe(self):
+        if rospy.get_param('~use_mask', False):
+            # larger buff_size is necessary for taking time callback
+            # http://stackoverflow.com/questions/26415699/ros-subscriber-not-up-to-date/29160379#29160379  # NOQA
+            sub = message_filters.Subscriber(
+                '~input', Image, queue_size=1, buff_size=2**24)
+            sub_mask = message_filters.Subscriber(
+                '~input/mask', Image, queue_size=1, buff_size=2**24)
+            self.subs = [sub, sub_mask]
+            queue_size = rospy.get_param('~queue_size', 10)
+            if rospy.get_param('~approximate_sync', False):
+                slop = rospy.get_param('~slop', 0.1)
+                sync = message_filters.ApproximateTimeSynchronizer(
+                    self.subs, queue_size=queue_size, slop=slop)
+            else:
+                sync = message_filters.TimeSynchronizer(
+                    self.subs, queue_size=queue_size)
+            sync.registerCallback(self._recognize)
+        else:
+            sub = rospy.Subscriber(
+                '~input', Image, self._recognize, callback_args=None,
+                queue_size=1, buff_size=2**24)
+            self.subs = [sub]
+
+    def unsubscribe(self):
+        for sub in self.subs:
+            sub.unregister()
+
+    def _recognize(self, imgmsg, mask_msg=None):
+        bridge = cv_bridge.CvBridge()
+        bgr = bridge.imgmsg_to_cv2(imgmsg, desired_encoding='bgr8')
+        if mask_msg is not None:
+            mask = bridge.imgmsg_to_cv2(mask_msg)
+            if mask.shape != bgr.shape[:2]:
+                logerr_throttle(10,
+                                'Size of input image and mask is different')
+                return
+            elif mask.size == 0:
+                logerr_throttle(10, 'Size of input mask is 0')
+                return
+            bgr[mask == 0] = self.mean_bgr
+        bgr = skimage.transform.resize(
+            bgr, (self.insize, self.insize), preserve_range=True)
+        input_msg = bridge.cv2_to_imgmsg(bgr.astype(np.uint8), encoding='bgr8')
+        input_msg.header = imgmsg.header
+        self.pub_input.publish(input_msg)
+
+        blob = (bgr - self.mean_bgr).transpose((2, 0, 1))
+        x_data = np.array([blob], dtype=np.float32)
+        if self.gpu != -1:
+            x_data = cuda.to_gpu(x_data, device=self.gpu)
+        if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'):
+            x = Variable(x_data, volatile=True)
+            self.model.train = False
+            self.model(x)
+        else:
+            with chainer.using_config('train', False), \
+                    chainer.no_backprop_mode():
+                x = Variable(x_data)
+                self.model(x)
+
+        proba = cuda.to_cpu(self.model.pred.data)[0]
+        label = np.argmax(proba)
+        label_name = self.target_names[label]
+        label_proba = proba[label]
+        cls_msg = ClassificationResult(
+            header=imgmsg.header,
+            labels=[label],
+            label_names=[label_name],
+            label_proba=[label_proba],
+            probabilities=proba,
+            classifier=self.model_name,
+            target_names=self.target_names,
+        )
+        self.pub.publish(cls_msg)
diff --git a/jsk_perception/src/jsk_perception/vil_inference_client.py b/jsk_perception/src/jsk_perception/vil_inference_client.py
new file mode 100644
index 0000000000..02026af4ff
--- /dev/null
+++ b/jsk_perception/src/jsk_perception/vil_inference_client.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python
+import abc
+import base64
+import json
+
+import actionlib
+import requests
+import rospy
+from cv_bridge import CvBridge
+from dynamic_reconfigure.server import Server
+from jsk_perception.cfg import ClassificationConfig, VQAConfig
+from jsk_recognition_msgs.msg import (ClassificationResult,
+                                      ClassificationTaskAction,
+                                      ClassificationTaskFeedback,
+                                      ClassificationTaskResult,
+                                      QuestionAndAnswerText, VQAResult,
+                                      VQATaskAction, VQATaskFeedback,
+                                      VQATaskResult)
+from requests.exceptions import ConnectionError
+from sensor_msgs.msg import CompressedImage, Image
+from std_msgs.msg import String
+
+import cv2
+
+
+class DockerInferenceClientBase(object):
+    def __init__(self, action,
+                 server_config,
+                 result_topic,
+                 action_feedback,
+                 action_result,
+                 app_name):
+        # inference server configuration
+        self.host = rospy.get_param("~host", default="localhost")
+        self.port = rospy.get_param("~port", default=8888)
+        self.app_name = app_name
+        # cv bridge
+        self._bridge = CvBridge()
+        # default inference image
+        self.default_img = None
+        # ROS
+        self.transport_hint = rospy.get_param('~image_transport', 'raw')
+        if self.transport_hint == 'compressed':
+            self.image_sub = rospy.Subscriber(
+                "{}/compressed".format(rospy.resolve_name('~image')),
+                CompressedImage,
+                callback=self.topic_cb,
+                queue_size=1,
+                buff_size=2**26
+            )
+
+        else:
+            self.image_sub = rospy.Subscriber("~image", Image,
+                                              callback=self.topic_cb,
+                                              queue_size=1,
+                                              buff_size=2**26)
+        self.result_topic_type = result_topic
+        self.result_pub = rospy.Publisher("~result", result_topic, queue_size=1)
+        if self.transport_hint == 'compressed':
+            self.image_pub = rospy.Publisher("~result/image/compressed", CompressedImage, queue_size=1)
+        else:
+            self.image_pub = rospy.Publisher("~result/image", Image, queue_size=1)
+        self.vis_pub = rospy.Publisher("~visualize", String, queue_size=1)
+        self.action_server = actionlib.SimpleActionServer("~inference_server",
+                                                          action,
+                                                          execute_cb=self.action_cb,
+                                                          auto_start=False)
+        self.action_feedback = action_feedback
+        self.action_result = action_result
+        self.reconfigure_server = Server(server_config, self.config_cb)
+        self.action_server.start()
+
+    def ros_img_to_base(self, ros_img):
+        if type(ros_img) is CompressedImage:
+            cv_img = self._bridge.compressed_imgmsg_to_cv2(ros_img, desired_encoding="bgr8")
+        elif type(ros_img) is Image:
+            cv_img = self._bridge.imgmsg_to_cv2(ros_img, desired_encoding="bgr8")
+        else:
+            raise RuntimeError("Unknown type {}".format(type(ros_img)))
+        # convert to base64
+        encimg = cv2.imencode(".png", cv_img)[1]
+        img_str = encimg.tostring()
+        img_byte = base64.b64encode(img_str).decode("utf-8")
+        return img_byte
+
+    def config_cb(self, config, level):
+        self.config = config
+        return config
+
+    @abc.abstractmethod
+    def topic_cb(self, msg):
+        pass
+
+    def action_cb(self, goal):
+        success = True
+        result = self.action_result()
+        feedback = self.action_feedback()
+        if goal.image.data and (not goal.compressed_image.data):
+            image = goal.image
+            # result.result.image = image
+        elif (not goal.image.data) and goal.compressed_image.data:
+            image = goal.compressed_image
+            # result.result.compressed_image = image
+        elif goal.image.data and goal.image.compressed_image.data:
+            rospy.logerr("Both image and compressed image can not be added simultaneously")
+            return
+        else:
+            rospy.loginfo("No images in goal message, so using subscribed image topic instead")
+            image = self.default_img
+        queries = self.create_queries(goal)
+        try:
+            result.result = self.inference(image, queries)
+        except Exception as e:
+            rospy.logerr(str(e))
+            feedback.status = str(e)
+            success = False
+        finally:
+            self.action_server.publish_feedback(feedback)
+            result.done = success
+            self.action_server.set_succeeded(result)
+
+    @abc.abstractmethod
+    def create_queries(self, goal):
+        pass
+
+    @abc.abstractmethod
+    def inference(self, img_msg, queries):
+        pass
+
+    def send_request(self, content):
+        url = "http://{}:{}/{}".format(self.host, str(self.port), self.app_name)
+        try:
+            response = requests.post(url, data=content)
+        except ConnectionError as e:
+            rospy.logwarn_once("Cannot establish the connection with API server. Is it running?")
+            raise e
+        else:
+            if response.status_code == 200:
+                return response
+            else:
+                err_msg = "Invalid http status code: {}".format(str(response.status_code))
+                rospy.logerr(err_msg)
+                raise RuntimeError(err_msg)
+
+
+class ClipClientNode(DockerInferenceClientBase):
+    def __init__(self):
+        DockerInferenceClientBase.__init__(self,
+                                           ClassificationTaskAction,
+                                           ClassificationConfig,
+                                           ClassificationResult,
+                                           ClassificationTaskFeedback,
+                                           ClassificationTaskResult,
+                                           "inference")
+
+    def topic_cb(self, data):
+        if not self.config: rospy.logwarn("No queries"); return
+        if not self.config.queries: rospy.logwarn("No queries"); return
+        queries = self.config.queries.split(";")
+        try:
+            msg = self.inference(data, queries)
+        except Exception: return
+        # publish debug image
+        self.image_pub.publish(data)
+        # publish classification result
+        msg.header = data.header
+        self.result_pub.publish(msg)
+        # publish probabilities result as string
+        vis_msg = ""
+        for i, label in enumerate(msg.label_names):
+            vis_msg += "{}: {:.2f}% ".format(label, msg.probabilities[i]*100)
+        vis_msg += "\n\nCosine Similarity\n"
+        for i, label in enumerate(msg.label_names):
+            vis_msg += "{}: {:.4f} ".format(label, msg.label_proba[i])
+        self.vis_pub.publish(vis_msg)
+
+    def create_queries(self, goal):
+        return goal.queries
+
+    def inference(self, img_msg, queries):
+        img_byte = self.ros_img_to_base(img_msg)
+        req = json.dumps({"image": img_byte,
+                          "queries": queries}).encode("utf-8")
+        response = self.send_request(req)
+        result_dic = json.loads(response.text)["results"]
+        labels = []
+        probabilities = []
+        similarities = []
+        for r in result_dic:
+            labels.append(r["question"])
+            probabilities.append(float(r["probability"]))
+            similarities.append(float(r["similarity"]))
+        labels = [label for _,label in sorted(zip(probabilities, labels), reverse=True)]
+        probabilities.sort(reverse=True)
+        similarities.sort(reverse=True)
+        # build ClassificationResult message
+        msg = self.result_topic_type()
+        msg.labels = list(range(len(labels)))
+        msg.label_names = labels
+        msg.label_proba = similarities     # cosine similarities
+        msg.probabilities = probabilities   # sum(probabilities) is 1
+        msg.classifier = 'clip'
+        msg.target_names = queries
+        return msg
+
+
+class OFAClientNode(DockerInferenceClientBase):
+    def __init__(self):
+        self.vqa_type = rospy.get_param("~vqa_type", default="caption") # caption, vqa_gen. caption is better than vqa_gen in OFA
+        if self.vqa_type not in ["caption", "vqa_gen"]:
+            raise RuntimeError("VQA type must be caption or vqa_gen")
+        DockerInferenceClientBase.__init__(self,
+                                           VQATaskAction,
+                                           VQAConfig,
+                                           VQAResult,
+                                           VQATaskFeedback,
+                                           VQATaskResult,
+                                           self.vqa_type)
+
+    def create_queries(self, goal):
+        return goal.questions
+
+    def inference(self, img_msg, queries):
+        img_byte = self.ros_img_to_base(img_msg)
+        req = json.dumps({"image": img_byte,
+                          "queries": queries}).encode("utf-8")
+        response = self.send_request(req)
+        json_result = json.loads(response.text)
+        msg = self.result_topic_type()
+        for result in json_result["results"]:
+            result_msg = QuestionAndAnswerText()
+            result_msg.question = result["question"]
+            result_msg.answer = result["answer"]
+            msg.result.append(result_msg)
+        return msg
+
+    def topic_cb(self, data):
+        if not self.config.questions: rospy.logwarn("No questions"); return
+        queries = self.config.questions.split(";")
+        try:
+            msg = self.inference(data, queries)
+        except Exception: return
+        self.image_pub.publish(data)
+        self.result_pub.publish(msg)
+        vis = ""
+        for qa in msg.result:
+            vis += "Q:{}\n A:{}\n".format(qa.question,
+                                          qa.answer)
+        self.vis_pub.publish(vis)
diff --git a/jsk_perception/src/point_pose_extractor.cpp b/jsk_perception/src/point_pose_extractor.cpp
index e94a5b7a32..3a49185548 100644
--- a/jsk_perception/src/point_pose_extractor.cpp
+++ b/jsk_perception/src/point_pose_extractor.cpp
@@ -32,677 +32,29 @@
  *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  *  POSSIBILITY OF SUCH DAMAGE.
  *********************************************************************/
-
-#include <ros/ros.h>
-#include <rospack/rospack.h>
-#include <cv_bridge/cv_bridge.h>
-#if ( CV_MAJOR_VERSION >= 4)
-#include <opencv2/opencv.hpp>
-#include <opencv2/highgui/highgui_c.h>
-#include <opencv2/calib3d/calib3d_c.h>
-#else
-#include <opencv/highgui.h>
-#include <opencv/cv.hpp>
-#endif
-#include <posedetection_msgs/Feature0DDetect.h>
-#include <posedetection_msgs/ImageFeature0D.h>
-#include <posedetection_msgs/ObjectDetection.h>
-#include <posedetection_msgs/Object6DPose.h>
-#include <geometry_msgs/PoseStamped.h>
-#include <geometry_msgs/Pose.h>
-#include <geometry_msgs/Quaternion.h>
-#include <sensor_msgs/image_encodings.h>
-#include <image_transport/image_transport.h>
-#include <image_geometry/pinhole_camera_model.h>
-#include <tf/tf.h>
-#include <tf/transform_broadcaster.h>
-#include <boost/shared_ptr.hpp>
-#include <boost/foreach.hpp>
-#include <boost/filesystem.hpp>
-#include <vector>
-#include <sstream>
-#include <iostream>
-#include <dynamic_reconfigure/server.h>
-#include <jsk_perception/point_pose_extractorConfig.h>
-#include <jsk_perception/SetTemplate.h>
+#include "jsk_perception/point_pose_extractor.h"
 #include <jsk_topic_tools/log_utils.h>
-namespace enc = sensor_msgs::image_encodings;
-
-bool _first_sample_change;
-
-void features2keypoint (posedetection_msgs::Feature0D features,
-                        std::vector<cv::KeyPoint>& keypoints,
-                        cv::Mat& descriptors){
-  keypoints.resize(features.scales.size());
-  descriptors.create(features.scales.size(),features.descriptor_dim,CV_32FC1);
-  std::vector<cv::KeyPoint>::iterator keypoint_it = keypoints.begin();
-  for ( int i=0; keypoint_it != keypoints.end(); ++keypoint_it, ++i ) {
-    *keypoint_it = cv::KeyPoint(cv::Point2f(features.positions[i*2+0],
-                                            features.positions[i*2+1]),
-                                features.descriptor_dim, // size
-                                features.orientations[i], //angle
-                                0, // resonse
-                                features.scales[i] // octave
-                                );
-    for (int j = 0; j < features.descriptor_dim; j++){
-      descriptors.at<float>(i,j) =
-        features.descriptors[i*features.descriptor_dim+j];
-    }
-  }
-}
-
-
-class Matching_Template
-{
-public:
-  std::string _matching_frame;
-  cv::Mat _template_img;
-  std::vector<cv::KeyPoint> _template_keypoints;
-  cv::Mat _template_descriptors;
-  int _original_width_size;
-  int _original_height_size;
-  double _template_width;       // width of template [m]
-  double _template_height;      // height of template [m]
-  tf::Transform _relativepose;
-  cv::Mat _affine_matrix;
-  std::string _window_name;
-  // The maximum allowed reprojection error to treat a point pair as an inlier
-  double _reprojection_threshold;
-  // threshold on squared ratio of distances between NN and 2nd NN
-  double _distanceratio_threshold;
-  std::vector<cv::Point2d> _correspondances;
-  cv::Mat _previous_stack_img;
-
-  Matching_Template(){
-  }
-  Matching_Template(cv::Mat img,
-                    std::string matching_frame,
-                    int original_width_size,
-                    int original_height_size,
-                    double template_width,
-                    double template_height,
-                    tf::Transform relativepose,
-                    cv::Mat affine_matrix,
-                    double reprojection_threshold,
-                    double distanceratio_threshold,
-                    std::string window_name,
-                    bool autosize){
-
-    _template_img = img.clone();
-    _matching_frame = matching_frame;
-    _original_width_size = original_width_size;
-    _original_height_size = original_height_size;
-    _template_width = template_width;
-    _template_height = template_height;
-    _relativepose = relativepose;
-    _affine_matrix = affine_matrix;
-    _window_name = window_name;
-    _reprojection_threshold = reprojection_threshold;
-    _distanceratio_threshold = distanceratio_threshold;
-    _template_keypoints.clear();
-    _correspondances.clear();
-    //    cv::namedWindow(_window_name, autosize ? CV_WINDOW_AUTOSIZE : 0);
-  }
-
-
-  virtual ~Matching_Template(){
-    std::cerr << "delete " << _window_name << std::endl;
-  }
-
-
-  std::string get_window_name(){
-    return _window_name;
-  }
-
-  std::vector<cv::Point2d>* correspondances(){
-    return &_correspondances;
-  }
-
-  bool check_template (){
-    if (_template_img.empty() && _template_keypoints.size() == 0 &&
-        _template_descriptors.empty()){
-      return false;
-    }
-    else {
-      return true;
-    }
-  }
-
-
-  int set_template(ros::ServiceClient client){
-    posedetection_msgs::Feature0DDetect srv;
-
-    if ( _template_img.empty()) {
-      ROS_ERROR ("template picture is empty.");
-      return -1;
-    }
-    ROS_INFO_STREAM("read template image and call " << client.getService() << " service");
-    cv_bridge::CvImage cv_img;
-    cv_img.image = cv::Mat(_template_img);
-    cv_img.encoding = std::string("bgr8");
-    srv.request.image = *(cv_img.toImageMsg());
-    if (client.call(srv)) {
-      ROS_INFO_STREAM("get features with " << srv.response.features.scales.size() << " descriptoins");
-      features2keypoint (srv.response.features, _template_keypoints, _template_descriptors);
-
-      // inverse affine translation
-      cv::Mat M_inv;
-      cv::Mat dst;
-      cv::invert(_affine_matrix, M_inv);
-      //      cv::drawKeypoints (tmp, _template_keypoints, dst, cv::Scalar::all(1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
-      //      cv::drawKeypoints (tmp, _template_keypoints, dst);
-      // cv::warpPerspective(_template_img, dst, M_inv,
-      //                          cv::Size(_original_width_size, _original_height_size),
-      //                          CV_INTER_LINEAR, IPL_BORDER_CONSTANT, 0);
-      for (int i = 0; i < (int)_template_keypoints.size(); i++){
-        cv::Point2f pt;
-        cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
-        cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &_template_keypoints.at(i).pt);
-        cv::perspectiveTransform (pt_src_mat, pt_mat, M_inv);
-        _template_keypoints.at(i).pt = pt_mat.at<cv::Point2f>(0,0);
-      }
-      //      cvSetMouseCallback (_window_name.c_str(), &PointPoseExtractor::cvmousecb, this);
-      //      _template_img = dst;
-      return 0;
-    } else {
-      ROS_ERROR("Failed to call service Feature0DDetect");
-      return 1;
-    }
-  }
-
-
-  double log_fac( int n )
-  {
-    static std::vector<double> slog_table;
-    int osize = slog_table.size();
-    if(osize <= n){
-      slog_table.resize(n+1);
-      if(osize == 0){
-        slog_table[0] = -1;
-        slog_table[1] = log(1.0);
-        osize = 2;
-      }
-      for(int i = osize; i <= n; i++ ){
-        slog_table[i] = slog_table[i-1] + log(i);
-      }
-    }
-    return slog_table[n];
-  }
-
-  int min_inlier( int n, int m, double p_badsupp, double p_badxform )
-  {
-    double pi, sum;
-    int i, j;
-    double lp_badsupp = log( p_badsupp );
-    double lp_badsupp1 = log( 1.0 - p_badsupp );
-    for( j = m+1; j <= n; j++ ){
-      sum = 0;
-      for( i = j; i <= n; i++ ){
-        pi = (i-m) * lp_badsupp + (n-i+m) * lp_badsupp1 +
-          log_fac( n - m ) - log_fac( i - m ) - log_fac( n - i );
-        sum += exp( pi );
-      }
-      if( sum < p_badxform )
-        break;
-    }
-    return j;
-  }
-
-
-  bool estimate_od (ros::ServiceClient client, cv::Mat src_img,
-                    std::vector<cv::KeyPoint> sourceimg_keypoints,
-                    image_geometry::PinholeCameraModel pcam,
-                    double err_thr, cv::Mat &stack_img,
-                    cv::flann::Index* ft, posedetection_msgs::Object6DPose* o6p){
-
-    if ( _template_keypoints.size()== 0 &&
-         _template_descriptors.empty() ){
-      set_template(client);
-    }
-    if ( _template_keypoints.size()== 0 &&
-         _template_descriptors.empty() ) {
-      ROS_ERROR ("Template image was not set.");
-      return false;
-    }
-    // stacked image
-    cv::Size stack_size  = cv::Size(MAX(src_img.cols,_template_img.cols),
-                                    src_img.rows+_template_img.rows);
-    stack_img = cv::Mat(stack_size,CV_8UC3);
-    stack_img = cv::Scalar(0);
-    cv::Mat stack_img_tmp(stack_img,cv::Rect(0,0,_template_img.cols,_template_img.rows));
-    cv::add(stack_img_tmp, _template_img, stack_img_tmp);
-    cv::Mat stack_img_src(stack_img,cv::Rect(0,_template_img.rows,src_img.cols,src_img.rows));
-    cv::add(stack_img_src, src_img, stack_img_src);
-    _previous_stack_img = stack_img.clone();
-
-    // matching
-    cv::Mat m_indices(_template_descriptors.rows, 2, CV_32S);
-    cv::Mat m_dists(_template_descriptors.rows, 2, CV_32F);
-    ft->knnSearch(_template_descriptors, m_indices, m_dists, 2, cv::flann::SearchParams(-1) );
-
-    // matched points
-    std::vector<cv::Point2f> pt1, pt2;
-    std::vector<int> queryIdxs,trainIdxs;
-    for ( unsigned int j = 0; j < _template_keypoints.size(); j++ ) {
-      if ( m_dists.at<float>(j,0) < m_dists.at<float>(j,1) * _distanceratio_threshold) {
-        queryIdxs.push_back(j);
-        trainIdxs.push_back(m_indices.at<int>(j,0));
-      }
-    }
-    if ( queryIdxs.size() == 0 ) {
-      ROS_WARN_STREAM("could not find matched points with distanceratio(" <<_distanceratio_threshold << ")");
-    } else {
-      cv::KeyPoint::convert(_template_keypoints,pt1,queryIdxs);
-      cv::KeyPoint::convert(sourceimg_keypoints,pt2,trainIdxs);
-    }
-
-    ROS_INFO ("Found %d total matches among %d template keypoints", (int)pt2.size(), (int)_template_keypoints.size());
-
-    cv::Mat H;
-    std::vector<uchar> mask((int)pt2.size());
-
-    if ( pt1.size() > 4 ) {
-          // ToDO for curve face
-      H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2), mask, CV_RANSAC, _reprojection_threshold);
-    }
-
-    // draw line
-    for (int j = 0; j < (int)pt1.size(); j++){
-      cv::Point2f pt, pt_orig;
-      cv::Mat pt_mat(cv::Size(1,1), CV_32FC2, &pt);
-      cv::Mat pt_src_mat(cv::Size(1,1), CV_32FC2, &pt1.at(j));
-      cv::perspectiveTransform (pt_src_mat, pt_mat, _affine_matrix);
-      pt_orig = pt_mat.at<cv::Point2f>(0,0);
-      if ( mask.at(j)){
-        cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
-                 CV_RGB(0,255,0), 1,8,0);
-      }
-      else {
-        cv::line(stack_img, pt_orig, pt2.at(j)+cv::Point2f(0,_template_img.rows),
-                 CV_RGB(255,0,255), 1,8,0);
-      }
-    }
-    int inlier_sum = 0;
-    for (int k=0; k < (int)mask.size(); k++){
-      inlier_sum += (int)mask.at(k);
-    }
-
-    double text_scale = 1.5;
-    {
-      int fontFace = 0, thickness = 0, baseLine;
-      int x, y;
-      cv::Size text_size;
-      std::string text;
-
-      text = "inlier: " + boost::lexical_cast<std::string>((int)inlier_sum) + " / " + boost::lexical_cast<std::string>((int)pt2.size());
-      text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
-      x = stack_img.size().width - text_size.width;
-      y = text_size.height + thickness + 10; // 10pt pading
-      cv::putText (stack_img, text, cv::Point(x, y),
-                   fontFace, text_scale, CV_RGB(0, 255, 0),
-                   thickness, 8, false);
-
-      text = "template: " + boost::lexical_cast<std::string>((int)_template_keypoints.size());
-      text_size = cv::getTextSize(text, fontFace, text_scale, thickness, &baseLine);
-      x = stack_img.size().width - text_size.width;
-      y += text_size.height + thickness + 10; // 10pt pading
-      cv::putText (stack_img, text, cv::Point(x, y),
-                   fontFace, text_scale, CV_RGB(0, 255, 0),
-                   thickness, 8, false);
-    }
-
-    // draw correspondances
-    ROS_INFO("  _correspondances.size: %d", (int)_correspondances.size());
-    for (int j = 0; j < (int)_correspondances.size(); j++){
-      cv::circle(stack_img, cv::Point2f(_correspondances.at(j).x, _correspondances.at(j).y + _template_img.size().height),
-                 8, CV_RGB(255,0,0), -1);
-    }
-
-    ROS_INFO("    inlier_sum:%d   min_lier:%d", inlier_sum, min_inlier((int)pt2.size(), 4, 0.10, 0.01));
-    if ((cv::countNonZero( H ) == 0) || (inlier_sum < min_inlier((int)pt2.size(), 4, 0.10, 0.01))){
-      ROS_INFO("    inlier_sum < min_lier return-from estimate-od");
-      if( _window_name != "" )
-        cv::imshow(_window_name, stack_img);
-      return false;
-    }
-
-    std::string _type;
-    char chr[20];
-
-    // number of match points
-    sprintf(chr, "%d", (int)pt2.size());
-    _type = _matching_frame + "_" + std::string(chr);
-
-    sprintf(chr, "%d", inlier_sum);
-    _type = _type + "_" + std::string(chr);
-
-    cv::Point2f corners2d[4] = {cv::Point2f(0,0),
-                                cv::Point2f(_original_width_size,0),
-                                cv::Point2f(_original_width_size,_original_height_size),
-                                cv::Point2f(0,_original_height_size)};
-    cv::Mat corners2d_mat (cv::Size(4, 1), CV_32FC2, corners2d);
-    cv::Point3f corners3d[4] = {cv::Point3f(0,0,0),
-                                cv::Point3f(0,_template_width,0),
-                                cv::Point3f(_template_height,_template_width,0),
-                                cv::Point3f(_template_height,0,0)};
-    cv::Mat corners3d_mat (cv::Size(4, 1), CV_32FC3, corners3d);
-
-    cv::Mat corners2d_mat_trans;
-
-    cv::perspectiveTransform (corners2d_mat, corners2d_mat_trans, H);
-
-    double fR3[3], fT3[3];
-    cv::Mat rvec(3, 1, CV_64FC1, fR3);
-    cv::Mat tvec(3, 1, CV_64FC1, fT3);
-    cv::Mat zero_distortion_mat = cv::Mat::zeros(4, 1, CV_64FC1);
-
-    cv::solvePnP (corners3d_mat, corners2d_mat_trans, 
-                  pcam.intrinsicMatrix(),
-                  zero_distortion_mat,//if unrectified: pcam.distortionCoeffs()
-                  rvec, tvec);
-
-    tf::Transform checktf, resulttf;
-
-    checktf.setOrigin( tf::Vector3(fT3[0], fT3[1], fT3[2] ) );
 
-    double rx = fR3[0], ry = fR3[1], rz = fR3[2];
-    tf::Quaternion quat;
-    double angle = cv::norm(rvec);
-    quat.setRotation(tf::Vector3(rx/angle, ry/angle, rz/angle), angle);
-    checktf.setRotation( quat );
-
-    resulttf = checktf * _relativepose;
-
-    ROS_INFO( "      tx: (%0.2lf,%0.2lf,%0.2lf) rx: (%0.2lf,%0.2lf,%0.2lf)",
-              resulttf.getOrigin().getX(),
-              resulttf.getOrigin().getY(),
-              resulttf.getOrigin().getZ(),
-              resulttf.getRotation().getAxis().x() * resulttf.getRotation().getAngle(),
-              resulttf.getRotation().getAxis().y() * resulttf.getRotation().getAngle(),
-              resulttf.getRotation().getAxis().z() * resulttf.getRotation().getAngle());
-
-    o6p->pose.position.x = resulttf.getOrigin().getX();
-    o6p->pose.position.y = resulttf.getOrigin().getY();
-    o6p->pose.position.z = resulttf.getOrigin().getZ();
-    o6p->pose.orientation.w = resulttf.getRotation().w();
-    o6p->pose.orientation.x = resulttf.getRotation().x();
-    o6p->pose.orientation.y = resulttf.getRotation().y();
-    o6p->pose.orientation.z = resulttf.getRotation().z();
-    o6p->type = _matching_frame; // _type
-
-    // draw 3d cube model
-    std::vector<cv::Point2f> projected_top;
-    {
-      tf::Vector3 coords[8] = {tf::Vector3(0,0,0),
-                               tf::Vector3(0, _template_width, 0),
-                               tf::Vector3(_template_height, _template_width,0),
-                               tf::Vector3(_template_height, 0, 0),
-                               tf::Vector3(0, 0, -0.03),
-                               tf::Vector3(0, _template_width, -0.03),
-                               tf::Vector3(_template_height, _template_width, -0.03),
-                               tf::Vector3(_template_height, 0, -0.03)};
-
-      projected_top = std::vector<cv::Point2f>(8);
-
-      for(int i=0; i<8; i++) {
-        coords[i] = checktf * coords[i];
-        cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());
-        projected_top[i] = pcam.project3dToPixel(pt);
-      }
-    }
-
-    { // check if the matched region does not too big or too small
-      float max_x, max_y, min_x, min_y;
-      max_x = max_y = -1e9;
-      min_x = min_y = 1e9;
-      for (int j = 0; j < 4; j++){
-        cv::Point2f pt = corners2d_mat_trans.at<cv::Point2f>(0,j);
-        max_x = std::max(max_x, pt.x), max_y = std::max(max_y, pt.y);
-        min_x = std::min(min_x, pt.x), min_y = std::min(min_y, pt.y);
-      }
-      if((max_x - min_x) < 30 || (max_y - min_y) < 30 ||
-         src_img.rows < (max_x - min_x)/2 || src_img.cols < (max_y - min_y)/2){
-        ROS_INFO("        matched region is too big or small (2< && <30) width:%f height:%f return-from estimate-od", max_x - min_x, max_y - min_y);
-        return false;
-      }
-    }
-
-    double err_sum = 0;
-    bool err_success = true;
-    for (int j = 0; j < 4; j++){
-      double err = sqrt(pow((corners2d_mat_trans.at<cv::Point2f>(0,j).x - projected_top.at(j).x), 2) +
-                      pow((corners2d_mat_trans.at<cv::Point2f>(0,j).y - projected_top.at(j).y), 2));
-      err_sum += err;
-    }
-    if (err_sum > err_thr){
-      ROS_INFO("          err_sum:%f > err_thr:%f return-from estimate-od", err_sum, err_thr);
-      err_success = false;
-    } else {
-      o6p->reliability = 1.0 - (err_sum / err_thr);
-    }
-    // draw lines around the detected object
-    for (int j = 0; j < corners2d_mat_trans.cols; j++){
-      cv::Point2f p1(corners2d_mat_trans.at<cv::Point2f>(0,j).x,
-                     corners2d_mat_trans.at<cv::Point2f>(0,j).y+_template_img.rows);
-      cv::Point2f p2(corners2d_mat_trans.at<cv::Point2f>(0,(j+1)%corners2d_mat_trans.cols).x,
-                     corners2d_mat_trans.at<cv::Point2f>(0,(j+1)%corners2d_mat_trans.cols).y+_template_img.rows);
-      cv::line (stack_img, p1, p2, CV_RGB(255, 0, 0),
-                (err_success?4:1), // width
-                CV_AA, 0);
-    }
-
-    // draw 3d cube model
-    if(projected_top.size() == 8) { // verify, the size is 8
-      int cnt = 8;
-      std::vector<cv::Point2f> ps(cnt);
-      for(int i=0; i<cnt; i++)
-        ps[i] = cv::Point2f(projected_top[i].x,
-                            projected_top[i].y+_template_img.rows);
-
-      int draw_width = ( err_success ? 3 : 1);
-      for(int i=0; i<4; i++) {
-        cv::line (stack_img, ps[i], ps[(i+1)%4], CV_RGB(0, 0, 255), draw_width, CV_AA, 0);
-        cv::line (stack_img, ps[i+4], ps[(i+1)%4+4], CV_RGB(0, 0, 255), draw_width, CV_AA, 0);
-        cv::line (stack_img, ps[i], ps[i+4], CV_RGB(0, 0, 255), draw_width, CV_AA, 0);
-      }
-    }
-
-    // draw coords
-    if ( err_success )
-    {
-      tf::Vector3 coords[4] = { tf::Vector3(0,0,0),
-                                tf::Vector3(0.05,0,0),
-                                tf::Vector3(0,0.05,0),
-                                tf::Vector3(0,0,0.05)};
-      std::vector<cv::Point2f> ps(4);
-
-      for(int i=0; i<4; i++) {
-        coords[i] = resulttf * coords[i];
-        cv::Point3f pt(coords[i].getX(), coords[i].getY(), coords[i].getZ());   
-        ps[i] = pcam.project3dToPixel(pt);
-        ps[i].y += _template_img.rows; // draw on camera image
-      }
-
-      cv::line (stack_img, ps[0], ps[1], CV_RGB(255, 0, 0), 3, CV_AA, 0);
-      cv::line (stack_img, ps[0], ps[2], CV_RGB(0, 255, 0), 3, CV_AA, 0);
-      cv::line (stack_img, ps[0], ps[3], CV_RGB(0, 0, 255), 3, CV_AA, 0);
-    }
-
-    // write text on image
-    {
-      std::string text;
-      int x, y;
-      text = "error: " + boost::lexical_cast<std::string>(err_sum);
-      x = stack_img.size().width - 16*17*text_scale; // 16pt * 17
-      y = _template_img.size().height - (16 + 2)*text_scale*6;
-      cv::putText (stack_img, text, cv::Point(x, y),
-                   0, text_scale, CV_RGB(0, 255, 0),
-                   2, 8, false);
-      ROS_INFO("      %s < %f (threshold)", text.c_str(), err_thr );
-    }
-    // for debug window
-    if( _window_name != "" )
-      cv::imshow(_window_name, stack_img);
-
-    return err_success;
-  }
-};  // the end of difinition of class Matching_Template
-
-
-class PointPoseExtractor
+namespace jsk_perception
 {
-  ros::NodeHandle _n;
-  image_transport::ImageTransport it;
-  ros::Subscriber _sub;
-  ros::ServiceServer _server;
-  ros::ServiceClient _client;
-  ros::Publisher _pub, _pub_agg, _pub_pose;
-  tf::TransformBroadcaster _br;
-  image_transport::Publisher _debug_pub;
-  double _reprojection_threshold;
-  double _distanceratio_threshold;
-  double _th_step;
-  double _phi_step;
-  bool _autosize;
-  double _err_thr;
-  static std::vector<Matching_Template *> _templates;
-  image_geometry::PinholeCameraModel pcam;
-  bool pnod;
-  bool _initialized;
-  bool _viewer;
-  bool _publish_tf;
-  std::string _child_frame_id;
-
-public:
-  PointPoseExtractor() : it(ros::NodeHandle("~")) {
-    // _sub = _n.subscribe("ImageFeature0D", 1,
-    //                     &PointPoseExtractor::imagefeature_cb, this);
-    _client = _n.serviceClient<posedetection_msgs::Feature0DDetect>("Feature0DDetect");
-    _pub = _n.advertise<posedetection_msgs::ObjectDetection>("ObjectDetection", 10);
-    _pub_agg = _n.advertise<posedetection_msgs::ObjectDetection>("ObjectDetection_agg", 10);
-    _pub_pose = _n.advertise<geometry_msgs::PoseStamped>("object_pose", 10);
-    _debug_pub = it.advertise("debug_image", 1);
-    _server = _n.advertiseService("SetTemplate", &PointPoseExtractor::settemplate_cb, this);
-    _initialized = false;
-  }
-
-  virtual ~PointPoseExtractor(){
+  PointPoseExtractor::~PointPoseExtractor(){
     _sub.shutdown();
     _client.shutdown();
     _pub.shutdown();
     _pub_agg.shutdown();
   }
 
-  static void make_template_from_mousecb(Matching_Template *mt){
-    cv::Mat H;
-    cv::Mat tmp_template, tmp_warp_template;
-    std::vector<cv::Point2f>pt1, pt2;
-    double width, height;
-    std::string filename;
-    std::cout << "input template's [width]" << std::endl;
-    std::cin >> width;
-    std::cout << "input template's [height]" << std::endl;
-    std::cin >> height;
-    std::cout << "input template's [filename]" << std::endl;
-    std::cin >> filename;
-
-    for (int i = 0; i < 4; i++){
-      pt1.push_back(cv::Point2d((int)mt->_correspondances.at(i).x,
-                                (int)mt->_correspondances.at(i).y + mt->_template_img.size().height));
-    }
-    cv::Rect rect = cv::boundingRect(cv::Mat(pt1));
-    double scale = std::max(width, height) / 500.0;
-    int iwidth = width / scale, iheight = height / scale;
-    pt2.push_back(cv::Point2d(0,0));
-    pt2.push_back(cv::Point2d(iwidth,0));
-    pt2.push_back(cv::Point2d(iwidth,iheight));
-    pt2.push_back(cv::Point2d(0,     iheight));
-    H = cv::findHomography(cv::Mat(pt1), cv::Mat(pt2));
-
-    cv::getRectSubPix(mt->_previous_stack_img, rect.size(),
-                      cv::Point2f((rect.tl().x + rect.br().x)/2.0,(rect.tl().y + rect.br().y)/2.0),
-                      tmp_template);
-    cv::warpPerspective(mt->_previous_stack_img, tmp_warp_template, H, cv::Size(iwidth, iheight));
-
-    try {
-      cv::imwrite(filename,tmp_template);
-      boost::filesystem::path fname(filename);
-      std::stringstream ss;
-      ss << fname.stem() << "_wrap" << fname.extension();
-      cv::imwrite(ss.str(),tmp_warp_template);
-    }catch (cv::Exception e) {
-      std::cerr << e.what()  << std::endl;
-    }
-
-    for (int i = 0; i < (int)pt1.size(); i++){
-      pt2.push_back(cv::Point2d((int)pt1.at(i).x - rect.x,
-                                (int)pt1.at(i).y - rect.y - mt->_template_img.size().height));
-    }
-    // cv::Mat mask_img = cv::Mat::zeros(tmp_template.size(), CV_8UC3);
-    // cv::fillConvexPoly(mask_img, pt2.begin(), (int)pt2.size(), CV_RGB(255, 255, 255));
-
-    // cv::namedWindow("hoge", 1);
-    // cv::imshow("hoge", mask_img);
-
-    cv::Mat M = (cv::Mat_<double>(3,3) << 1,0,0, 0,1,0, 0,0,1);
-    std::string window_name = "sample" + boost::lexical_cast<std::string>((int)_templates.size());
-
-    Matching_Template* tmplt = 
-      new Matching_Template (tmp_warp_template, "sample",
-                             tmp_warp_template.size().width, tmp_warp_template.size().height,
-                             width, height,
-                             tf::Transform(tf::Quaternion(0, 0, 0, 1), tf::Vector3(0, 0, 0)),
-                             M,
-                             mt->_reprojection_threshold,
-                             mt->_distanceratio_threshold,
-                             _first_sample_change ? window_name : mt->_window_name,
-                             cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
-
-    mt->_correspondances.clear();
-    _templates.push_back(tmplt);
-    cv::namedWindow(_first_sample_change ? window_name : mt->_window_name,
-                    cv::getWindowProperty(mt->_window_name, CV_WND_PROP_AUTOSIZE));
-    cvSetMouseCallback (_first_sample_change ? window_name.c_str() : mt->_window_name.c_str(),
-                        &cvmousecb, static_cast<void *>(_templates.back()));
-    _first_sample_change = true;
-  }
-
-  static void cvmousecb (int event, int x, int y, int flags, void* param){
-    Matching_Template *mt = (Matching_Template*)param;
-    // std::cerr << "mousecb_ -> " << mt << std::endl;
-    switch (event){
-    case CV_EVENT_LBUTTONUP: {
-      cv::Point2d pt(x,y - (int)mt->_template_img.size().height);
-      ROS_INFO("add correspondence (%d, %d)", (int)pt.x, (int)pt.y);
-      mt->_correspondances.push_back(pt);
-      if ((int)mt->_correspondances.size() >= 4){
-        make_template_from_mousecb(mt);
-        mt->_correspondances.clear();
-        ROS_INFO("reset");
-      }
-      break;
-    }
-    case CV_EVENT_RBUTTONUP: {
-      mt->_correspondances.clear();
-      ROS_INFO("reset");
-      break;
-    }
-    }
-  }
-
-
-  void initialize () {
+  void PointPoseExtractor::initialize () {
     std::string _pose_str;
     double template_width;
     double template_height;
     std::string template_filename;
     std::string window_name;
-    ros::NodeHandle local_nh("~");
 
-    local_nh.param("child_frame_id", _child_frame_id, std::string("matching"));
-    local_nh.param("object_width",  template_width,  0.06);
-    local_nh.param("object_height", template_height, 0.0739);
-    local_nh.param("relative_pose", _pose_str, std::string("0 0 0 0 0 0 1"));
+    pnh_->param("child_frame_id", _child_frame_id, std::string("matching"));
+    pnh_->param("object_width",  template_width,  0.06);
+    pnh_->param("object_height", template_height, 0.0739);
+    pnh_->param("relative_pose", _pose_str, std::string("0 0 0 0 0 0 1"));
     std::string default_template_file_name;
     try {
 #ifdef ROSPACK_EXPORT
@@ -719,19 +71,17 @@ class PointPoseExtractor
 #endif
     } catch (std::runtime_error &e) {
     }
-    local_nh.param("template_filename", template_filename, default_template_file_name);
-    local_nh.param("reprojection_threshold", _reprojection_threshold, 3.0);
-    local_nh.param("distanceratio_threshold", _distanceratio_threshold, 0.49);
-    local_nh.param("error_threshold", _err_thr, 50.0);
-    local_nh.param("theta_step", _th_step, 5.0);
-    local_nh.param("phi_step", _phi_step, 5.0);
-    local_nh.param("viewer_window", _viewer, true);
-    local_nh.param("window_name", window_name, std::string("sample1"));
-    local_nh.param("autosize", _autosize, false);
-    local_nh.param("publish_null_object_detection", pnod, false);
-    local_nh.param("publish_tf", _publish_tf, false);
-
-    _first_sample_change = false;
+    pnh_->param("template_filename", template_filename, default_template_file_name);
+    pnh_->param("reprojection_threshold", _reprojection_threshold, 3.0);
+    pnh_->param("distanceratio_threshold", _distanceratio_threshold, 0.49);
+    pnh_->param("error_threshold", _err_thr, 50.0);
+    pnh_->param("theta_step", _th_step, 5.0);
+    pnh_->param("phi_step", _phi_step, 5.0);
+    pnh_->param("viewer_window", _viewer, true);
+    pnh_->param("window_name", window_name, std::string("sample1"));
+    pnh_->param("autosize", _autosize, false);
+    pnh_->param("publish_null_object_detection", pnod, false);
+    pnh_->param("publish_tf", _publish_tf, false);
 
     // make one template
     cv::Mat template_img;
@@ -741,6 +91,8 @@ class PointPoseExtractor
       return;
     }
 
+    _first_sample_change = false;
+
     // relative pose
     std::vector<double> rv(7);
     std::istringstream iss(_pose_str);
@@ -750,11 +102,11 @@ class PointPoseExtractor
 
     if (iss.eof()) { // use rpy expression
       transform = tf::Transform(tf::createQuaternionFromRPY(rv[3], rv[4], rv[5]),
-                                 tf::Vector3(rv[0], rv[1], rv[2]));
+                                tf::Vector3(rv[0], rv[1], rv[2]));
     } else {  // use quaternion expression
       iss >> rv[6];
       transform = tf::Transform(tf::Quaternion(rv[3], rv[4], rv[5], rv[6]),
-                                 tf::Vector3(rv[0], rv[1], rv[2]));
+                                tf::Vector3(rv[0], rv[1], rv[2]));
     }
 
     // add the image to template list
@@ -764,8 +116,8 @@ class PointPoseExtractor
   } // initialize
 
 
-  cv::Mat make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec,
-                          double template_width, double template_height, cv::Size &size){
+  cv::Mat PointPoseExtractor::make_homography(cv::Mat src, cv::Mat rvec, cv::Mat tvec,
+                                              double template_width, double template_height, cv::Size &size){
 
     cv::Point3f coner[4] = {cv::Point3f(-(template_width/2.0), -(template_height/2.0),0),
                             cv::Point3f(template_width/2.0,-(template_height/2.0),0),
@@ -806,11 +158,11 @@ class PointPoseExtractor
   }
 
 
-  int make_warped_images (cv::Mat src, std::vector<cv::Mat> &imgs,
-                          std::vector<cv:: Mat> &Mvec,
-                          double template_width, double template_height,
-                          double th_step, double phi_step){
-
+  int PointPoseExtractor::make_warped_images (cv::Mat src, std::vector<cv::Mat> &imgs,
+                                              std::vector<cv:: Mat> &Mvec,
+                                              double template_width, double template_height,
+                                              double th_step, double phi_step){
+    
     std::vector<cv::Size> sizevec;
 
     for (int i = (int)((-3.14/4.0)/th_step); i*th_step < 3.14/4.0; i++){
@@ -831,11 +183,13 @@ class PointPoseExtractor
         cv::Mat M;
         cv::Size size;
         M = make_homography(src, rvec, tvec, template_width, template_height, size);
-        Mvec.push_back(M);
-        sizevec.push_back(size);
+        //Whenever an H matrix cannot be estimated, cv::findHomography returns an empty one.
+        if(!M.empty()){
+          Mvec.push_back(M);
+          sizevec.push_back(size);
+        }
       }
     }
-
     for (int i = 0; i < (int)Mvec.size(); i++){
       cv::Mat dst;
       cv::warpPerspective(src, dst, Mvec.at(i), sizevec.at(i),
@@ -845,9 +199,9 @@ class PointPoseExtractor
     return 0;
   }
 
-  bool add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose,
-                        double template_width, double template_height,
-                        double theta_step=5.0, double phi_step=5.0)
+  bool PointPoseExtractor::add_new_template(cv::Mat img, std::string typestr, tf::Transform relative_pose,
+                                            double template_width, double template_height,
+                                            double theta_step=5.0, double phi_step=5.0)
   {
     std::vector<cv::Mat> imgs;
     std::vector<cv::Mat> Mvec;
@@ -872,36 +226,35 @@ class PointPoseExtractor
                               (_viewer ? type : ""), _autosize);
       _templates.push_back(tmplt);
       if( _viewer )
-      {
-        cv::namedWindow(type, _autosize ? CV_WINDOW_AUTOSIZE : 0);
-        cvSetMouseCallback (type.c_str(), &cvmousecb, static_cast<void *>(_templates.back()));
-      }
+        {
+          cv::namedWindow(type, _autosize ? CV_WINDOW_AUTOSIZE : 0);
+          cvSetMouseCallback (type.c_str(), &cvmousecb, this);
+        }
     }
     return true;
   }
 
-  bool settemplate_cb (jsk_perception::SetTemplate::Request &req,
-                       jsk_perception::SetTemplate::Response &res){
-      cv_bridge::CvImagePtr cv_ptr;
-      cv_ptr = cv_bridge::toCvCopy(req.image, enc::BGR8);
-      cv::Mat img(cv_ptr->image);
+  bool PointPoseExtractor::settemplate_cb (jsk_perception::SetTemplate::Request &req,
+                                           jsk_perception::SetTemplate::Response &res){
+    if ( !_initialized ) {
+      ROS_WARN("SetTemplate service is available only after receiving input ImageFeature0D");
+      return false;
+    }
+    cv_bridge::CvImagePtr cv_ptr;
+    cv_ptr = cv_bridge::toCvCopy(req.image, enc::BGR8);
+    cv::Mat img(cv_ptr->image);
 
-      tf::Transform transform(tf::Quaternion(req.relativepose.orientation.x,
-                                             req.relativepose.orientation.y,
-                                             req.relativepose.orientation.z,
-                                             req.relativepose.orientation.w),
-                              tf::Vector3(req.relativepose.position.x,
-                                          req.relativepose.position.y,
-                                          req.relativepose.position.z));
+    tf::Transform transform;
+    tf::poseMsgToTF(req.relativepose, transform);
 
-      // add the image to template list 
-      add_new_template(img, req.type, transform,
-                       req.dimx, req.dimy, 1.0, 1.0);
-      return true;
+    // add the image to template list
+    add_new_template(img, req.type, transform,
+                     req.dimx, req.dimy, 1.0, 1.0);
+    return true;
   }
 
 
-  void imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg){
+  void PointPoseExtractor::imagefeature_cb (const posedetection_msgs::ImageFeature0DConstPtr& msg){
     std::vector<cv::KeyPoint> sourceimg_keypoints;
     cv::Mat sourceimg_descriptors;
     std::vector<posedetection_msgs::Object6DPose> vo6p;
@@ -957,31 +310,22 @@ class PointPoseExtractor
         // Publish result as geometry_msgs/PoseStamped. But it can only contain one object
         geometry_msgs::PoseStamped pose_msg;
         pose_msg.header = od.header;
-        pose_msg.pose = od.objects[0].pose;
-        _pub_pose.publish(pose_msg);
+        if ((int)vo6p.size() != 0) {
+          pose_msg.pose = od.objects[0].pose;
+          _pub_pose.publish(pose_msg);
+        }
         // broadcast tf
         if ( this->_publish_tf ) {
-          tf::Transform transform(
-                  tf::Quaternion(
-                      pose_msg.pose.orientation.x,
-                      pose_msg.pose.orientation.y,
-                      pose_msg.pose.orientation.z,
-                      pose_msg.pose.orientation.w
-                      ),
-                  tf::Vector3(
-                      pose_msg.pose.position.x,
-                      pose_msg.pose.position.y,
-                      pose_msg.pose.position.z
-                      )
-                  );
+          tf::Transform transform;
+          tf::poseMsgToTF(pose_msg.pose, transform);
           _br.sendTransform(
-                  tf::StampedTransform(
-                      transform,
-                      msg->image.header.stamp,
-                      msg->image.header.frame_id,
-                      _child_frame_id
-                      )
-                  );
+                            tf::StampedTransform(
+                                                 transform,
+                                                 msg->image.header.stamp,
+                                                 msg->image.header.frame_id,
+                                                 _child_frame_id
+                                                 )
+                            );
         }
       }
     }
@@ -991,28 +335,43 @@ class PointPoseExtractor
     cv::waitKey( 10 );
   }
 
-  /* if there are subscribers of the output topic -> do work
-         else if -> unregister all topics this node subscribing
-   */
-  void check_subscribers()
+  void PointPoseExtractor::onInit()
   {
-        if(_pub.getNumSubscribers() == 0 && _initialized) {
-          if(_sub)
-                _sub.shutdown();
-          static int i = 0;
-          if ( i++ % 100 == 0 ) {
-              ROS_INFO("wait for subscriberes ... %s", _pub.getTopic().c_str());
-          }
-        } else {
-          if(!_sub)
-                _sub = _n.subscribe("ImageFeature0D", 1,
-                                    &PointPoseExtractor::imagefeature_cb, this);
-        }
+    DiagnosticNodelet::onInit();
+    dynamic_reconfigure::Server<Config> server;
+    dynamic_reconfigure::Server<Config>::CallbackType f;
+    f = boost::bind(&PointPoseExtractor::dyn_conf_callback, this, _1, _2);
+    server.setCallback(f);
+
+    it = new image_transport::ImageTransport(*pnh_);
+    // Use nh_ instead of pnh_ for backward compatibility.
+    // See https://github.com/jsk-ros-pkg/jsk_recognition/pull/2779 and 
+    // https://github.com/jsk-ros-pkg/jsk_recognition/pull/2778
+    _client = nh_->serviceClient<posedetection_msgs::Feature0DDetect>("Feature0DDetect");
+    _pub = advertise<posedetection_msgs::ObjectDetection>(*nh_, "ObjectDetection", 10);
+    _pub_agg = advertise<posedetection_msgs::ObjectDetection>(*nh_, "ObjectDetection_agg", 10);
+    _pub_pose = advertise<geometry_msgs::PoseStamped>(*nh_, "object_pose", 10);
+    _debug_pub = it->advertise("debug_image", 1);
+    _server = nh_->advertiseService("SetTemplate", &PointPoseExtractor::settemplate_cb, this);
+    _initialized = false;
+
+    onInitPostProcess();
+  }
+
+  void PointPoseExtractor::subscribe()
+  {
+    _sub = nh_->subscribe("ImageFeature0D", 1,
+                           &PointPoseExtractor::imagefeature_cb, this);
+  }
+
+  void PointPoseExtractor::unsubscribe()
+  {
+    _sub.shutdown();
   }
 
   /* callback for dynamic reconfigure */
-  void dyn_conf_callback(jsk_perception::point_pose_extractorConfig &config,
-                         uint32_t level) {
+  void PointPoseExtractor::dyn_conf_callback(Config &config,
+                                             uint32_t level) {
     std::cout << "id = " << config.template_id << std::endl;
     std::cout << "lvl = " << level << std::endl;
     if((int)_templates.size() <= config.template_id) {
@@ -1039,25 +398,5 @@ class PointPoseExtractor
 
 };  // the end of definition of class PointPoseExtractor
 
-
-std::vector<Matching_Template *> PointPoseExtractor::_templates;
-
-int main (int argc, char **argv){
-  ros::init (argc, argv, "PointPoseExtractor");
-
-  PointPoseExtractor matcher;
-
-  dynamic_reconfigure::Server<jsk_perception::point_pose_extractorConfig> server;
-  dynamic_reconfigure::Server<jsk_perception::point_pose_extractorConfig>::CallbackType f;
-  f = boost::bind(&PointPoseExtractor::dyn_conf_callback, &matcher, _1, _2);
-  server.setCallback(f);
-
-  ros::Rate r(10); // 10 hz
-  while(ros::ok()) {
-        ros::spinOnce();
-        matcher.check_subscribers();
-        r.sleep();
-  }
-
-  return 0;
-}
+#include <pluginlib/class_list_macros.h>
+PLUGINLIB_EXPORT_CLASS (jsk_perception::PointPoseExtractor, nodelet::Nodelet);
diff --git a/jsk_perception/src/split_image.cpp b/jsk_perception/src/split_image.cpp
new file mode 100644
index 0000000000..e84e7280bd
--- /dev/null
+++ b/jsk_perception/src/split_image.cpp
@@ -0,0 +1,101 @@
+// -*- mode: c++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2023, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * split_image.cpp
+ * Author: Yoshiki Obinata <obinata@jsk.imi.i.u-tokyo.ac.jp>
+ */
+
+
+#include "jsk_perception/split_image.h"
+
+namespace jsk_perception{
+
+    void SplitImage::onInit(){
+        DiagnosticNodelet::always_subscribe_ = true;
+        DiagnosticNodelet::onInit();
+        pnh_->param("vertical_parts", vertical_parts_, 1);
+        pnh_->param("horizontal_parts", horizontal_parts_, 1);
+        for(int i=0; i<vertical_parts_; i++){
+            for(int j=0; j<horizontal_parts_; j++){
+                std::string pub_name = "output/vertical0"
+                    + boost::to_string(i)
+                    + "/horizontal0"
+                    + boost::to_string(j);
+                ros::Publisher pub_ = advertise<sensor_msgs::Image>(*pnh_, pub_name, 1);
+                pubs_.push_back(pub_);
+            }
+        }
+        onInitPostProcess();
+    }
+
+    SplitImage::~SplitImage(){}
+
+    void SplitImage::subscribe(){
+        sub_ = pnh_->subscribe("input", 1, &SplitImage::splitImage, this);
+    }
+
+    void SplitImage::unsubscribe(){
+        sub_.shutdown();
+    }
+
+    void SplitImage::splitImage(const sensor_msgs::Image::ConstPtr& msg){
+        cv_bridge::CvImagePtr cv_ptr;
+        try{
+            cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
+        }catch(cv_bridge::Exception& e){
+            NODELET_ERROR("cv_bridge exception: %s", e.what());
+            return;
+        }
+        cv::Mat image = cv_ptr->image;
+        int height = image.rows;
+        int width = image.cols;
+        int vertical_size = height / vertical_parts_;
+        int horizontal_size = width / horizontal_parts_;
+        for(int i=0; i<vertical_parts_; i++){
+            for(int j=0; j<horizontal_parts_; j++){
+                cv::Mat part_image = image(cv::Rect(horizontal_size*j, vertical_size*i, horizontal_size, vertical_size));
+                cv_bridge::CvImagePtr part_cv_ptr(new cv_bridge::CvImage);
+                part_cv_ptr->header = msg->header;
+                part_cv_ptr->encoding = sensor_msgs::image_encodings::BGR8;
+                part_cv_ptr->image = part_image;
+                sensor_msgs::ImagePtr part_msg = part_cv_ptr->toImageMsg();
+                pubs_.at(i*horizontal_parts_+j).publish(part_msg);
+            }
+        }
+    }
+}
+
+#include <pluginlib/class_list_macros.h>
+PLUGINLIB_EXPORT_CLASS(jsk_perception::SplitImage, nodelet::Nodelet);
diff --git a/jsk_perception/src/video_to_scene.cpp b/jsk_perception/src/video_to_scene.cpp
new file mode 100644
index 0000000000..6d323d9cb0
--- /dev/null
+++ b/jsk_perception/src/video_to_scene.cpp
@@ -0,0 +1,112 @@
+// -*- mode: C++ -*-
+/*********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2022, JSK Lab
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of the JSK Lab nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *********************************************************************/
+/*
+ * video_to_scene.cpp
+ * Author: Kei Okada <k-okada@jsk.t.u-tokyo.ac.jp>
+ */
+
+#include "jsk_perception/video_to_scene.h"
+#include <boost/assign.hpp>
+
+namespace jsk_perception{
+    void VideoToScene::onInit(){
+        DiagnosticNodelet::onInit();
+
+        bgsubtractor = cv::bgsegm::createBackgroundSubtractorGMG();
+        pnh_->param("min_percent", min_percent_,  5);
+        pnh_->param("max_percent", max_percent_, 20);
+        captured_ = false;
+
+        srv_ = boost::make_shared<dynamic_reconfigure::Server<Config> > (*pnh_);
+        dynamic_reconfigure::Server<Config>::CallbackType f =
+            boost::bind(&VideoToScene::configCallback, this, _1, _2);
+        srv_->setCallback (f);
+
+        //pub_ = advertise<sensor_msgs::Image>(*pnh_, "output", 1);
+        pub_ = advertiseImage(*pnh_, "output", 1);
+
+        onInitPostProcess();
+    }
+
+    void VideoToScene::subscribe(){
+        std::string transport;
+        pnh_->param("image_transport", transport, std::string("raw"));
+        NODELET_INFO_STREAM("Using transport \"" << transport << "\"");
+        image_transport::TransportHints hints(transport, ros::TransportHints(), *pnh_);
+        //
+        it_.reset(new image_transport::ImageTransport(*pnh_));
+        sub_ = it_->subscribe(pnh_->resolveName("input"), 1, &VideoToScene::work, this, hints);
+        ros::V_string names = boost::assign::list_of("input");
+        jsk_topic_tools::warnNoRemap(names);
+    }
+
+    void VideoToScene::unsubscribe(){
+        sub_.shutdown();
+    }
+
+    void VideoToScene::work(const sensor_msgs::Image::ConstPtr& image_msg){
+        cv::Mat image;
+
+        vital_checker_ -> poke();
+        boost::mutex::scoped_lock lock(mutex_);
+
+        image = cv_bridge::toCvShare(image_msg, sensor_msgs::image_encodings::RGB8) -> image;
+        cv::resize(image, image, cv::Size(), 300.0/image.cols, 300.0/image.cols);
+
+        cv::Mat bgmask;
+        bgsubtractor->apply(image, bgmask);
+        cv::erode(bgmask, bgmask, cv::Mat(), cv::Point(-1, -1), 2);
+        cv::dilate(bgmask, bgmask, cv::Mat(), cv::Point(-1, -1), 2);
+
+        int p = cv::countNonZero(bgmask) / float(bgmask.cols * bgmask.rows) * 100;
+        NODELET_DEBUG_STREAM("p = " << p << ", min_percent = " << min_percent_ << ", max_percent = " << max_percent_ << ", captured = " << captured_);
+
+        if ( p < min_percent_ && !captured_ ) {
+          captured_ = true;
+          pub_.publish(image_msg);
+        } else if ( captured_ && p >= max_percent_ ) {
+          captured_ = false;
+        }
+    }
+
+    void VideoToScene::configCallback(Config &config, uint32_t level){
+        boost::mutex::scoped_lock lock(mutex_);
+        min_percent_ = config.min_percent;
+        max_percent_ = config.max_percent;
+    }
+}
+
+#include <pluginlib/class_list_macros.h>
+PLUGINLIB_EXPORT_CLASS (jsk_perception::VideoToScene, nodelet::Nodelet);
diff --git a/jsk_perception/test/apply_context_to_label_probability.test b/jsk_perception/test/apply_context_to_label_probability.test
index 301f2c8348..8629c18214 100644
--- a/jsk_perception/test/apply_context_to_label_probability.test
+++ b/jsk_perception/test/apply_context_to_label_probability.test
@@ -8,12 +8,12 @@
   <test test-name="test_apply_context_to_label_probability"
         name="test_apply_context_to_label_probability"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /apply_context_to_label_probability/output
-      timeout_0: 60
+      timeout_0: 120
       topic_1: /apply_context_to_label_probability/output/label
-      timeout_1: 60
+      timeout_1: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/deep_sort_tracker.test b/jsk_perception/test/deep_sort_tracker.test
index a421e64707..a12d814fd8 100644
--- a/jsk_perception/test/deep_sort_tracker.test
+++ b/jsk_perception/test/deep_sort_tracker.test
@@ -8,12 +8,12 @@
   <test test-name="test_deep_sort_tracker"
         name="test_deep_sort_tracker"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /deep_sort_tracker/output/viz
-      timeout_0: 60
+      timeout_0: 120
       topic_1: /deep_sort_tracker/output/labels
-      timeout_1: 60
+      timeout_1: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/face_pose_estimation.test b/jsk_perception/test/face_pose_estimation.test
index 35d8e6d5b9..ebb24ce560 100644
--- a/jsk_perception/test/face_pose_estimation.test
+++ b/jsk_perception/test/face_pose_estimation.test
@@ -7,14 +7,14 @@
   <test test-name="test_face_pose_estimation"
         name="test_face_pose_estimation"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /face_pose_estimation/output/rects
-      timeout_0: 60
+      timeout_0: 120
       topic_1: /face_pose_estimation/output/pose
-      timeout_1: 60
+      timeout_1: 120
       topic_2: /face_pose_estimation/output/gender
-      timeout_2: 60
+      timeout_2: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/fcn_object_segmentation.test b/jsk_perception/test/fcn_object_segmentation.test
index dd65968f1e..c7571d8e1c 100644
--- a/jsk_perception/test/fcn_object_segmentation.test
+++ b/jsk_perception/test/fcn_object_segmentation.test
@@ -11,10 +11,10 @@
   <test test-name="test_fcn_object_segmentation"
         name="test_fcn_object_segmentation"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        time-limit="120">
     <rosparam>
       topic_0: /fcn_object_segmentation/output
-      timeout_0: 60
+      timeout_0: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/human_mesh_recovery.test b/jsk_perception/test/human_mesh_recovery.test
index e59a9acf20..1992a43d0b 100644
--- a/jsk_perception/test/human_mesh_recovery.test
+++ b/jsk_perception/test/human_mesh_recovery.test
@@ -7,10 +7,10 @@
   <test test-name="test_human_mesh_recovery"
         name="test_human_mesh_recovery"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3" >
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /human_mesh_recovery/output/pose
-      timeout_0: 60
+      timeout_0: 100
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/invert_mask_image.test b/jsk_perception/test/invert_mask_image.test
new file mode 100644
index 0000000000..73dff168cc
--- /dev/null
+++ b/jsk_perception/test/invert_mask_image.test
@@ -0,0 +1,19 @@
+<launch>
+
+  <include file="$(find jsk_perception)/sample/sample_invert_mask_image.launch">
+    <arg name="gui" value="false" />
+  </include>
+
+  <test test-name="test_invert_mask_image"
+        name="test_invert_mask_image"
+        pkg="jsk_tools" type="test_topic_published.py"
+        retry="3">
+    <rosparam>
+      topic_0: /invert_mask_image/output
+      timeout_0: 30
+      check_after_kill_node: true
+      node_names: [invert_mask_image,]
+    </rosparam>
+  </test>
+
+</launch>
diff --git a/jsk_perception/test/label_image_classifier.test b/jsk_perception/test/label_image_classifier.test
index ba37c915b7..c4e476bf14 100644
--- a/jsk_perception/test/label_image_classifier.test
+++ b/jsk_perception/test/label_image_classifier.test
@@ -9,10 +9,10 @@
   <test test-name="test_label_image_classifier"
         name="test_label_image_classifier"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /label_image_classifier/output
-      timeout_0: 60
+      timeout_0: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/people_pose_estimation_2d.test b/jsk_perception/test/people_pose_estimation_2d.test
index 5fcd015d13..af4fc84494 100644
--- a/jsk_perception/test/people_pose_estimation_2d.test
+++ b/jsk_perception/test/people_pose_estimation_2d.test
@@ -7,16 +7,16 @@
   <test test-name="test_people_pose_estimation_2d"
         name="test_people_pose_estimation_2d"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /people_pose_estimation_2d/output
-      timeout_0: 60
+      timeout_0: 120
       topic_1: /people_pose_estimation_2d/pose
-      timeout_1: 60
+      timeout_1: 120
       topic_2: /people_pose_estimation_2d/pose_2d
-      timeout_2: 60
+      timeout_2: 120
       topic_3: /people_pose_estimation_2d/skeleton
-      timeout_3: 60
+      timeout_3: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/point_pose_extractor.test b/jsk_perception/test/point_pose_extractor.test
new file mode 100644
index 0000000000..102747e55b
--- /dev/null
+++ b/jsk_perception/test/point_pose_extractor.test
@@ -0,0 +1,22 @@
+<launch>
+
+  <include file="$(find jsk_perception)/sample/sample_point_pose_extractor.launch" >
+    <arg name="gui" value="false" />
+  </include>
+
+  <test test-name="test_point_pose_extractor"
+        name="test_point_pose_extractor"
+        pkg="jsk_tools" type="test_topic_published.py" >
+    <rosparam>
+      topic_0: /ObjectDetection
+      timeout_0: 30
+      topic_1: /ObjectDetection_agg
+      timeout_1: 30
+      topic_2: /object_pose
+      timeout_2: 30
+      topic_3: /point_pose_extractor/debug_image
+      timeout_3: 30      
+    </rosparam>
+  </test>
+
+</launch>
diff --git a/jsk_perception/test/point_pose_extractor_group_ns.test b/jsk_perception/test/point_pose_extractor_group_ns.test
new file mode 100644
index 0000000000..0ed56ad043
--- /dev/null
+++ b/jsk_perception/test/point_pose_extractor_group_ns.test
@@ -0,0 +1,50 @@
+<launch>
+  <arg name="gui" default="false" />
+
+  <node name="raw_image_bgr"
+        pkg="jsk_perception" type="image_publisher.py">
+    <remap from="~output" to="~image_color" />
+    <remap from="~output/camera_info" to="~camera_info" />
+    <rosparam subst_value="true">
+      file_name: $(find jsk_perception)/sample/image/items_in_shelf.jpg
+      encoding: bgr8
+      publish_info: true
+      fovx: 84.1
+      fovy: 53.8
+    </rosparam>
+  </node>
+
+  <group ns="/test_template_matching">
+    <node name="imagesift" pkg="imagesift" type="imagesift"
+          output="screen" >
+      <remap from="image" to="/raw_image_bgr/image_color" />
+      <remap from="camera_info" to="/raw_image_bgr/camera_info"/>
+    </node>
+    <node name="point_pose_extractor" pkg="jsk_perception" type="point_pose_extractor"
+          output="screen">
+      <rosparam subst_value="true">
+        template_filename: $(find jsk_perception)/sample/image/pai_no_mi.jpg
+        template_id: 0
+        object_width: 0.145
+        object_height: 0.128
+        viewer_window: $(arg gui)
+      </rosparam>
+    </node>
+  </group>
+
+  <test test-name="test_group_ns_point_pose_extractor"
+        name="test_group_ns_point_pose_extractor"
+        pkg="jsk_tools" type="test_topic_published.py" >
+    <rosparam>
+      topic_0: /test_template_matching/ObjectDetection
+      timeout_0: 30
+      topic_1: /test_template_matching/ObjectDetection_agg
+      timeout_1: 30
+      topic_2: /test_template_matching/object_pose
+      timeout_2: 30
+      topic_3: /test_template_matching/point_pose_extractor/debug_image
+      timeout_3: 30
+    </rosparam>
+  </test>
+
+</launch>
diff --git a/jsk_perception/test/probability_image_classifier.test b/jsk_perception/test/probability_image_classifier.test
index 71cb98c7c5..8f25ae32db 100644
--- a/jsk_perception/test/probability_image_classifier.test
+++ b/jsk_perception/test/probability_image_classifier.test
@@ -9,10 +9,10 @@
   <test test-name="test_probability_image_classifier"
         name="test_probability_image_classifier"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120">
     <rosparam>
       topic_0: /probability_image_classifier/output
-      timeout_0: 60
+      timeout_0: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/publish_mouse_event.py b/jsk_perception/test/publish_mouse_event.py
new file mode 100755
index 0000000000..899d505fd3
--- /dev/null
+++ b/jsk_perception/test/publish_mouse_event.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# copied from image_view2
+# workaround until https://github.com/jsk-ros-pkg/jsk_common/pull/1774 is merged and released
+#
+
+import cv2
+import numpy as np
+
+import rospy
+from sensor_msgs.msg import Image
+import cv_bridge
+from image_view2.msg import MouseEvent
+
+
+def main():
+    pub_plus = rospy.Publisher('~plus_rect_event', MouseEvent, queue_size=1)
+    pub_minus = rospy.Publisher('~minus_rect_event', MouseEvent, queue_size=1)
+
+    width = int(rospy.get_param('~image_width'))
+    height = int(rospy.get_param('~image_height'))
+    plus_events = [
+        MouseEvent(type=3, x=int(width/4), y=int(height/4), width=width, height=height),
+        MouseEvent(type=4, x=int(width/2), y=int(height/2), width=width, height=height),
+        MouseEvent(type=2, x=int(3*width/4), y=int(3*height/4), width=width, height=height),
+    ]
+    minus_events = [
+        MouseEvent(type=3, x=int(3*width/4), y=int(3*height/4), width=width, height=height),
+        MouseEvent(type=4, x=int(width/2), y=int(height/2), width=width, height=height),
+        MouseEvent(type=2, x=int(width/4), y=int(height/4), width=width, height=height),
+    ]
+    rate = rospy.Rate(10)
+    while not rospy.is_shutdown():
+        for e in plus_events:
+            e.header.stamp = rospy.get_rostime()
+            pub_plus.publish(e)
+            rate.sleep()
+        for e in minus_events:
+            e.header.stamp = rospy.get_rostime()
+            pub_minus.publish(e)
+            rate.sleep()
+
+
+if __name__ == '__main__':
+    rospy.init_node('publish_mouse_event')
+    main()
diff --git a/jsk_perception/test/ssd_object_detector.test b/jsk_perception/test/ssd_object_detector.test
index 24992e14ac..a23ce97b05 100644
--- a/jsk_perception/test/ssd_object_detector.test
+++ b/jsk_perception/test/ssd_object_detector.test
@@ -5,7 +5,7 @@
 
   <test test-name="test_ssd_object_detector"
         pkg="jsk_tools" type="test_topic_published.py"
-        retry="3">
+        retry="3" time-limit="120" >
     <rosparam>
       topic_0: /ssd_object_detector/output/image
       timeout_0: 120
diff --git a/jsk_perception/test/tile_image.test b/jsk_perception/test/tile_image.test
index 73a211d0d1..6996c73781 100644
--- a/jsk_perception/test/tile_image.test
+++ b/jsk_perception/test/tile_image.test
@@ -1,7 +1,7 @@
 <launch>
 
   <include file="$(find jsk_perception)/sample/sample_tile_image.launch">
-    <env name="DISPLAY" value="" />
+    <arg name="gui" value="false" />
   </include>
 
   <test test-name="test_tile_image"
diff --git a/jsk_perception/test/vgg16_object_recognition.test b/jsk_perception/test/vgg16_object_recognition.test
index 7da18e04ce..9471b6cc2b 100644
--- a/jsk_perception/test/vgg16_object_recognition.test
+++ b/jsk_perception/test/vgg16_object_recognition.test
@@ -7,12 +7,13 @@
 
   <test test-name="test_vgg16_object_recognition"
         name="test_vgg16_object_recognition"
-        pkg="jsk_tools" type="test_topic_published.py">
+        pkg="jsk_tools" type="test_topic_published.py"
+        retry="3" time-limit="120" >
     <rosparam>
       topic_0: /vgg16_object_recognition/output
-      timeout_0: 30
+      timeout_0: 120
       topic_1: /vgg16_object_recognition/debug/net_input
-      timeout_1: 30
+      timeout_1: 120
     </rosparam>
   </test>
 
diff --git a/jsk_perception/test/video_to_scene.test b/jsk_perception/test/video_to_scene.test
new file mode 100644
index 0000000000..b9b9fb9ce1
--- /dev/null
+++ b/jsk_perception/test/video_to_scene.test
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <include file="$(find jsk_perception)/sample/sample_video_to_scene.launch">
+    <arg name="gui" value="false" />
+  </include>
+
+  <test test-name="test_video_to_scene"
+        name="test_video_to_scene"
+        pkg="jsk_tools" type="test_topic_published.py"
+        time-limit="20" retry="3">
+    <rosparam>
+      topic_0: /kinect_head/rgb/image_rect_color/output
+      timeout_0: 20
+      topic_1: /kinect_head/rgb/image_rect_color/output/compressed
+      timeout_1: 20
+      check_after_kill_node: true
+      node_names: [video_to_scene,]
+    </rosparam>
+  </test>
+
+</launch>
diff --git a/jsk_recognition_msgs/CMakeLists.txt b/jsk_recognition_msgs/CMakeLists.txt
index 116018c0b2..86e6164943 100644
--- a/jsk_recognition_msgs/CMakeLists.txt
+++ b/jsk_recognition_msgs/CMakeLists.txt
@@ -15,6 +15,7 @@ add_message_files(
   Circle2DArray.msg
   Circle2D.msg
   ClassificationResult.msg
+  ClipResult.msg
   ClusterPointIndices.msg
   ColorHistogramArray.msg
   ColorHistogram.msg
@@ -46,6 +47,8 @@ add_message_files(
   PointsArray.msg
   PolygonArray.msg
   PosedCameraInfo.msg
+  QueryAndProbability.msg
+  QuestionAndAnswerText.msg
   RectArray.msg
   Rect.msg
   RotatedRect.msg
@@ -68,6 +71,7 @@ add_message_files(
   TrackingStatus.msg
   BoolStamped.msg
   VectorArray.msg
+  VQAResult.msg
   WeightedPoseArray.msg
   ContactSensor.msg
   ContactSensorArray.msg
@@ -110,9 +114,14 @@ add_service_files(FILES
   WhiteBalance.srv
   )
 
+add_action_files(FILES
+  VQATask.action
+  ClassificationTask.action
+  )
+
 generate_messages(
   DEPENDENCIES
-  std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs
+  std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs actionlib_msgs
 )
 catkin_package(
   CATKIN_DEPENDS std_msgs sensor_msgs geometry_msgs pcl_msgs jsk_footstep_msgs
@@ -133,7 +142,7 @@ install(DIRECTORY sample
   USE_SOURCE_PERMISSIONS
 )
 
-if(NOT $ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
+if($ENV{ROS_DISTRO} STREQUAL "indigo") # on noetic it needs catkin_install_python to support Python3 and it does not work on indigo for some reason...
   install(DIRECTORY scripts
     DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
     USE_SOURCE_PERMISSIONS
diff --git a/jsk_recognition_msgs/action/ClassificationTask.action b/jsk_recognition_msgs/action/ClassificationTask.action
new file mode 100644
index 0000000000..417235cc2c
--- /dev/null
+++ b/jsk_recognition_msgs/action/ClassificationTask.action
@@ -0,0 +1,8 @@
+sensor_msgs/Image image
+sensor_msgs/CompressedImage compressed_image
+string[] queries
+---
+jsk_recognition_msgs/ClassificationResult result
+bool done
+---
+string status
diff --git a/jsk_recognition_msgs/action/VQATask.action b/jsk_recognition_msgs/action/VQATask.action
new file mode 100644
index 0000000000..efd3a0e066
--- /dev/null
+++ b/jsk_recognition_msgs/action/VQATask.action
@@ -0,0 +1,8 @@
+sensor_msgs/Image image
+sensor_msgs/CompressedImage compressed_image
+string[] questions
+---
+jsk_recognition_msgs/VQAResult result
+bool done
+---
+string status
diff --git a/jsk_recognition_msgs/msg/ClipResult.msg b/jsk_recognition_msgs/msg/ClipResult.msg
new file mode 100644
index 0000000000..f20e76c27d
--- /dev/null
+++ b/jsk_recognition_msgs/msg/ClipResult.msg
@@ -0,0 +1,2 @@
+sensor_msgs/Image image
+jsk_recognition_msgs/QueryAndProbability[] result
diff --git a/jsk_recognition_msgs/msg/PanoramaInfo.msg b/jsk_recognition_msgs/msg/PanoramaInfo.msg
index 6dd1be12b1..602d7791bd 100644
--- a/jsk_recognition_msgs/msg/PanoramaInfo.msg
+++ b/jsk_recognition_msgs/msg/PanoramaInfo.msg
@@ -11,7 +11,13 @@ std_msgs/Header header
 string projection_model
 
 #######################################################################
-#                     Field of View Parameters                         #
+#                      Image shape info                               #
+#######################################################################
+uint32 image_height
+uint32 image_width
+
+#######################################################################
+#                     Field of View Parameters                        #
 #######################################################################
 float64 theta_min
 float64 theta_max
diff --git a/jsk_recognition_msgs/msg/QueryAndProbability.msg b/jsk_recognition_msgs/msg/QueryAndProbability.msg
new file mode 100644
index 0000000000..3495b19f3d
--- /dev/null
+++ b/jsk_recognition_msgs/msg/QueryAndProbability.msg
@@ -0,0 +1,2 @@
+string query
+float32 probability
diff --git a/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg b/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg
new file mode 100644
index 0000000000..1070abd928
--- /dev/null
+++ b/jsk_recognition_msgs/msg/QuestionAndAnswerText.msg
@@ -0,0 +1,2 @@
+string question
+string answer
diff --git a/jsk_recognition_msgs/msg/VQAResult.msg b/jsk_recognition_msgs/msg/VQAResult.msg
new file mode 100644
index 0000000000..f2cc09c093
--- /dev/null
+++ b/jsk_recognition_msgs/msg/VQAResult.msg
@@ -0,0 +1 @@
+jsk_recognition_msgs/QuestionAndAnswerText[] result
diff --git a/jsk_recognition_utils/package.xml b/jsk_recognition_utils/package.xml
index 237e47bf78..cf7d9e8ce7 100644
--- a/jsk_recognition_utils/package.xml
+++ b/jsk_recognition_utils/package.xml
@@ -15,7 +15,8 @@
   <buildtool_depend condition="$ROS_PYTHON_VERSION == 3">python3-setuptools</buildtool_depend>
 
   <buildtool_depend>catkin</buildtool_depend>
-  <build_depend>cython</build_depend>
+  <build_depend condition="$ROS_PYTHON_VERSION==2">cython</build_depend>
+  <build_depend condition="$ROS_PYTHON_VERSION==3">cython3</build_depend>
   <build_depend>eigen_conversions</build_depend>
   <build_depend>dynamic_reconfigure</build_depend>
   <build_depend>geometry_msgs</build_depend>
@@ -46,7 +47,8 @@
   <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-skimage</exec_depend>
   <exec_depend condition="$ROS_PYTHON_VERSION == 3">python3-skimage</exec_depend>
   <exec_depend>python-fcn-pip</exec_depend>
-  <exec_depend>python-shapely</exec_depend>
+  <exec_depend condition="$ROS_PYTHON_VERSION == 2">python-shapely</exec_depend>
+  <exec_depend condition="$ROS_PYTHON_VERSION == 3">python3-shapely</exec_depend>
   <exec_depend>sensor_msgs</exec_depend>
   <exec_depend>std_msgs</exec_depend>
   <exec_depend>tf2_ros</exec_depend>
diff --git a/sound_classification/launch/audio_to_spectrogram.launch b/sound_classification/launch/audio_to_spectrogram.launch
index 7e7511b1d0..afbd67093e 100644
--- a/sound_classification/launch/audio_to_spectrogram.launch
+++ b/sound_classification/launch/audio_to_spectrogram.launch
@@ -59,7 +59,7 @@
   <!-- mono spectrogram -> preprocessed mono spectrogram -->
   <!-- Spectral subtraction by spectral subtraction method -->
   <node pkg="sound_classification" type="preprocess_gray_image.py" name="preprocess_gray_image" >
-    <remap from="~input" to="/spectrum_to_spectrogram/spectrogram" />
+    <remap from="~input" to="spectrum_to_spectrogram/spectrogram" />
   </node>
 
   <!-- End: Data conversion -->
@@ -69,12 +69,12 @@
     <!-- visualize spectrum -->
     <!-- Note that this is spectrum before noise subtraction -->
     <!-- <node pkg="audio_to_spectrogram" type="spectrum_plot.py" name="spectrum_plot"  > -->
-    <!--   <remap from="~spectrum" to="/audio_to_spectrum/spectrum_filtered" /> -->
+    <!--   <remap from="~spectrum" to="spectrogram_source_filter/output" /> -->
     <!-- </node> -->
 
     <!-- visualize spectrogram -->
     <node pkg="image_view" type="image_view" name="spectrogram_view"  >
-      <remap from="image" to="/preprocess_gray_image/output_normalized" />
+      <remap from="image" to="preprocess_gray_image/output_normalized" />
       <rosparam>
         <!-- set all pixels between 0 and 255 -->
         do_dynamic_scaling: true
diff --git a/sound_classification/launch/classify_sound.launch b/sound_classification/launch/classify_sound.launch
index 06325ed57d..276ea93c73 100644
--- a/sound_classification/launch/classify_sound.launch
+++ b/sound_classification/launch/classify_sound.launch
@@ -36,7 +36,7 @@
 
   <!-- Classify sounds -->
   <node pkg="sound_classification" type="sound_classifier.py" name="sound_classifier" output="screen" >
-    <remap from="~input" to="/preprocess_gray_image/output_normalized" />
+    <remap from="~input" to="preprocess_gray_image/output_normalized" />
     <rosparam subst_value="true">
       gpu: $(arg gpu)
       model_name: nin
@@ -47,11 +47,11 @@
   <group if="$(arg gui)">
     <node name="sound_classification_result"
           pkg="sound_classification" type="draw_classification_result.py" >
-      <remap from="~input" to="/sound_classifier/output" />
-      <remap from="~input/image" to="/sound_classifier/debug/net_input" />
+      <remap from="~input" to="sound_classifier/output" />
+      <remap from="~input/image" to="sound_classifier/debug/net_input" />
     </node>
     <node pkg="image_view" type="image_view" name="classification_result_view"  >
-      <remap from="image" to="/sound_classification_result/output" />
+      <remap from="image" to="sound_classification_result/output" />
     </node>
   </group>
 
diff --git a/sound_classification/launch/save_noise.launch b/sound_classification/launch/save_noise.launch
index d94aaf9c19..2db861520c 100644
--- a/sound_classification/launch/save_noise.launch
+++ b/sound_classification/launch/save_noise.launch
@@ -34,7 +34,7 @@
 
   <!-- Collect noise spectrogram -->
   <node pkg="sound_classification" type="noise_saver.py" name="noise_saver" output="screen">
-    <remap from="~raw_spectrogram" to="/spectrum_to_spectrogram/spectrogram" />
+    <remap from="~raw_spectrogram" to="spectrum_to_spectrogram/spectrogram" />
     <rosparam subst_value="true">
       save_data_rate: $(arg save_data_rate) <!-- related to spectrogram_period -->
     </rosparam>
diff --git a/sound_classification/launch/save_sound.launch b/sound_classification/launch/save_sound.launch
index 56799dd28c..e32cff10e4 100644
--- a/sound_classification/launch/save_sound.launch
+++ b/sound_classification/launch/save_sound.launch
@@ -39,9 +39,9 @@
 
   <!-- Collect spectrogram with sound class, only when the robot is in sound. -->
   <node pkg="sound_classification" type="sound_saver.py" name="sound_saver" output="screen">
-    <remap from="~in_sound" to="/sound_detector_volume/in_sound" />
-    <remap from="~input" to="/preprocess_gray_image/output_normalized" />
-    <remap from="~input_raw" to="/spectrum_to_spectrogram/spectrogram" />
+    <remap from="~in_sound" to="sound_detector_volume/in_sound" />
+    <remap from="~input" to="preprocess_gray_image/output_normalized" />
+    <remap from="~input_raw" to="spectrum_to_spectrogram/spectrogram" />
     <rosparam subst_value="true">
       <!-- params below is enabled only when save_data is true -->
       save_data_rate: $(arg save_data_rate) <!-- related to spectrogram_period -->
@@ -54,7 +54,7 @@
   <!-- Detect sound based on pixel value sum (nearly equals to volume) -->
   <node pkg="sound_classification" type="sound_detector_volume.py"
         name="sound_detector_volume" respawn="true" output="screen">
-    <remap from="~input" to="/preprocess_gray_image/output" />
+    <remap from="~input" to="preprocess_gray_image/output" />
     <rosparam subst_value="true">
       power_per_pixel_threshold: $(arg threshold)
       lazy: false