@inproceedings{Li2023,
title = {STOW: Discrete-Frame Segmentation and Tracking of Unseen Objects for Warehouse Picking Robots},
year = {2023},
author = {Yi Li and Muru Zhang and Markus Grotz and Kaichun Mo and Dieter Fox},
abstract = {Segmentation and tracking of unseen object instances in discrete frames pose a significant challenge in dynamic industrial robotic contexts, such as distribution warehouses. Here, robots must handle object rearrangements, including shifting, removal, and partial occlusion by new items, and track these items after substantial temporal gaps. The task is further complicated when robots encounter objects beyond their training sets, thereby requiring the ability to segment and track previously unseen items. Considering that continuous observation is often inaccessible in such settings, our task involves working with a discrete set of frames separated by indefinite periods, during which substantial changes to the scene may occur. This task also translates to domestic robotic applications, such as table rearrangement. To address these demanding challenges, we introduce new synthetic and real-world datasets that replicate these industrial and household scenarios. Furthermore, we propose a novel paradigm for joint segmentation and tracking in discrete frames, alongside a transformer module that facilitates efficient inter-frame communication. Our approach significantly outperforms recent methods in our experiments. For additional results and videos, please visit https://sites.google.com/view/stow-corl23.},
url = {https://sites.google.com/view/stow-corl23},
file = {https://arxiv.org/pdf/2311.02337},
booktitle = {Conference on Robot Learning (CoRL)}
}
Abstract:
Segmentation and tracking of unseen object instances in discrete frames pose a significant challenge in dynamic industrial robotic contexts, such as distribution warehouses. Here, robots must handle object rearrangements, including shifting, removal, and partial occlusion by new items, and track these items after substantial temporal gaps. The task is further complicated when robots encounter objects beyond their training sets, thereby requiring the ability to segment and track previously unseen items. Considering that continuous observation is often inaccessible in such settings, our task involves working with a discrete set of frames separated by indefinite periods, during which substantial changes to the scene may occur. This task also translates to domestic robotic applications, such as table rearrangement. To address these demanding challenges, we introduce new synthetic and real-world datasets that replicate these industrial and household scenarios. Furthermore, we propose a novel paradigm for joint segmentation and tracking in discrete frames, alongside a transformer module that facilitates efficient inter-frame communication. Our approach significantly outperforms recent methods in our experiments. For additional results and videos, please visit https://sites.google.com/view/stow-corl23.