Source prompt:
machine gun
↓
Target prompt:
laser gun
Input Video
ControlVideo+ZEUS
TokenFlow+ZEUS
RAVE+ZEUS
AvED (Ours)
Source prompt:
a dog is howling
↓
Target prompt:
a lion is roaring
Source prompt:
a race car is driving on the road
↓
Target prompt:
a police car is driving on the road
Source prompt:
firework lit up in the night sky
↓
Target prompt:
a splash of water lit up in the night sky
@article{lin2025zeroshot,
title={Zero-Shot Audio-Visual Editing via Cross-Modal Delta Denoising},
author={Yan-Bo Lin and Kevin Lin and Zhengyuan Yang and Linjie Li and Jianfeng Wang and Chung-Ching Lin and Xiaofei Wang and Gedas Bertasius and Lijuan Wang},
year={2025},
journal={arXiv preprint arXiv:2503.20782},
}