Skip to content

Commit

Permalink
feat: Zenseact Open Dataset (zod) support (#23)
Browse files Browse the repository at this point in the history
- add zod (https://zod.zenseact.com/)
- refactor TableAligner
- flatten batch structure
- more generic table processing: mapping -> pytree
- group sources/readers by filetype
- remove `scripts/build_table.py` + configs
- test data fixes
  • Loading branch information
egorchakov authored Nov 18, 2024
1 parent 18289b5 commit 15f45bf
Show file tree
Hide file tree
Showing 141 changed files with 1,741 additions and 1,944 deletions.
2 changes: 0 additions & 2 deletions .gitattributes

This file was deleted.

4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "src/rbyte/io/table/yaak/idl-repo"]
path = src/rbyte/io/table/yaak/idl-repo
[submodule "src/rbyte/io/yaak/idl-repo"]
path = src/rbyte/io/yaak/idl-repo
url = git@github.com:yaak-ai/idl-repo
9 changes: 2 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,20 @@ repos:
hooks:
- id: validate-pyproject

- repo: https://github.com/crate-ci/typos
rev: v1.26.8
hooks:
- id: typos

- repo: https://github.com/asottile/pyupgrade
rev: v3.19.0
hooks:
- id: pyupgrade

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.1
rev: v0.7.2
hooks:
- id: ruff
args: [--fix]
- id: ruff-format

- repo: https://github.com/DetachHead/basedpyright-pre-commit-mirror
rev: 1.19.1
rev: 1.21.0
hooks:
- id: basedpyright

Expand Down
10 changes: 0 additions & 10 deletions config/_templates/build_table.yaml

This file was deleted.

62 changes: 31 additions & 31 deletions config/_templates/dataset/carla.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ _convert_: all
inputs:
#@ for input_id in drives:
(@=input_id@):
frame:
sources:
#@ for source_id in cameras:
(@=source_id@):
index_column: _idx_
reader:
_target_: rbyte.io.frame.DirectoryFrameReader
source:
_target_: rbyte.io.PathTensorSource
path: "${data_dir}/(@=input_id@)/frames/(@=source_id@).defish.mp4/576x324/{:09d}.jpg"
frame_decoder:
decoder:
_target_: simplejpeg.decode_jpeg
_partial_: true
colorspace: rgb
Expand All @@ -75,40 +75,40 @@ inputs:

#@ end

table:
builder:
_target_: rbyte.io.table.TableBuilder
_convert_: all
readers:
- path: ${data_dir}/(@=input_id@)/ego_logs.json
reader:
_target_: rbyte.io.table.JsonTableReader
_recursive_: false
fields:
records:
_idx_:
control.brake:
control.throttle:
control.steer:
state.velocity.value:
state.acceleration.value:
table_builder:
_target_: rbyte.io.table.TableBuilder
_convert_: all
readers:
ego_logs:
path: ${data_dir}/(@=input_id@)/ego_logs.json
reader:
_target_: rbyte.io.JsonTableReader
_recursive_: false
fields:
records:
_idx_:
control.brake:
control.throttle:
control.steer:
state.velocity.value:
state.acceleration.value:

transforms:
- _target_: rbyte.io.table.transforms.FpsResampler
source_fps: 20
target_fps: 30
transforms:
- _target_: rbyte.io.FpsResampler
source_fps: 20
target_fps: 30

merger:
_target_: rbyte.io.table.TableConcater
method: vertical
merger:
_target_: rbyte.io.TableConcater
method: vertical

filter: |
`control.throttle` > 0.5
filter: |
`control.throttle` > 0.5
#@ end

sample_builder:
_target_: rbyte.sample.builder.GreedySampleTableBuilder
_target_: rbyte.sample.GreedySampleBuilder
index_column: _idx_
length: 1
stride: 1
Expand Down
44 changes: 21 additions & 23 deletions config/_templates/dataset/mimicgen.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#! https://huggingface.co/datasets/amandlek/mimicgen_datasets/blob/main/source/coffee.hdf5

#@yaml/text-templated-strings

#@ inputs = {
Expand All @@ -10,7 +8,7 @@
#@ }

#@ frame_keys = [
#@ 'obs/agentview_image',
#@ "obs/agentview_image",
#@ ]
---
_target_: rbyte.Dataset
Expand All @@ -20,38 +18,38 @@ inputs:
#@ for input_id, input_keys in inputs.items():
#@ for input_key in input_keys:
(@=input_id@)(@=input_key@):
frame:
sources:
#@ for frame_key in frame_keys:
(@=frame_key@):
index_column: _idx_
reader:
_target_: rbyte.io.frame.Hdf5FrameReader
source:
_target_: rbyte.io.Hdf5TensorSource
path: "${data_dir}/(@=input_id@).hdf5"
key: (@=input_key@)/(@=frame_key@)
#@ end

table:
builder:
_target_: rbyte.io.table.TableBuilder
_convert_: all
readers:
- path: "${data_dir}/(@=input_id@).hdf5"
reader:
_target_: rbyte.io.table.Hdf5TableReader
_recursive_: false
fields:
(@=input_key@):
_idx_:
obs/robot0_eef_pos:
table_builder:
_target_: rbyte.io.TableBuilder
_convert_: all
readers:
hdf5:
path: "${data_dir}/(@=input_id@).hdf5"
reader:
_target_: rbyte.io.Hdf5TableReader
_recursive_: false
fields:
(@=input_key@):
_idx_:
obs/robot0_eef_pos:

merger:
_target_: rbyte.io.table.TableConcater
method: vertical
merger:
_target_: rbyte.io.TableConcater
method: vertical
#@ end
#@ end

sample_builder:
_target_: rbyte.sample.builder.GreedySampleTableBuilder
_target_: rbyte.sample.GreedySampleBuilder
index_column: _idx_
length: 1
stride: 1
Expand Down
102 changes: 102 additions & 0 deletions config/_templates/dataset/nuscenes/mcap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#@yaml/text-templated-strings

#@ inputs = [
#@ "nuScenes-v1.0-mini-scene-0061-cut",
#@ ]

#@ camera_topics = {
#@ "CAM_FRONT": "/CAM_FRONT/image_rect_compressed",
#@ "CAM_FRONT_LEFT": "/CAM_FRONT_LEFT/image_rect_compressed",
#@ "CAM_FRONT_RIGHT": "/CAM_FRONT_RIGHT/image_rect_compressed",
#@ }
---
_target_: rbyte.Dataset
_convert_: all
_recursive_: false
inputs:
#@ for input_id in inputs:
(@=input_id@):
sources:
#@ for camera, topic in camera_topics.items():
(@=camera@):
index_column: mcap/(@=topic@)/_idx_
source:
_target_: rbyte.io.McapTensorSource
path: "${data_dir}/(@=input_id@).mcap"
topic: (@=topic@)
decoder_factory: mcap_protobuf.decoder.DecoderFactory
decoder:
_target_: simplejpeg.decode_jpeg
_partial_: true
colorspace: rgb
fastdct: true
fastupsample: true
#@ end

table_builder:
_target_: rbyte.io.TableBuilder
_convert_: all
readers:
mcap:
path: "${data_dir}/(@=input_id@).mcap"
reader:
_target_: rbyte.io.McapTableReader
_recursive_: false
decoder_factories:
- rbyte.utils.mcap.ProtobufDecoderFactory
- rbyte.utils.mcap.JsonDecoderFactory

fields:
#@ for topic in camera_topics.values():
(@=topic@):
log_time:
_target_: polars.Datetime
time_unit: ns

_idx_:
#@ end

/odom:
log_time:
_target_: polars.Datetime
time_unit: ns
vel.x:

merger:
_target_: rbyte.io.TableAligner
separator: "/"
merge:
mcap:
#@ topic = camera_topics.values()[0]
(@=topic@):
key: log_time

#@ for topic in camera_topics.values()[1:]:
(@=topic@):
key: log_time
columns:
_idx_:
method: asof
tolerance: 40ms
strategy: nearest
#@ end

/odom:
key: log_time
columns:
vel.x:
method: interp

filter: |
`mcap//odom/vel.x` >= 8
cache:
#@ end

sample_builder:
_target_: rbyte.sample.GreedySampleBuilder
index_column: mcap/(@=camera_topics.values()[0]@)/_idx_
length: 1
stride: 1
min_step: 1
filter: !!null
Loading

0 comments on commit 15f45bf

Please sign in to comment.