Please cite with the following BibTeX:
@article{caffagni2025seeing,
title={{Seeing Beyond Words: Self-Supervised Visual Learning for Multimodal Large Language Models}},
author={Caffagni, Davide and Sarto, Sara and Cornia, Marcella and Baraldi, Lorenzo and Dovesi, Pier Luigi and Roohi, Shaghayegh and Granroth-Wilding, Mark and Cucchiara, Rita},
journal={arXiv preprint arXiv:X.X},
year={2025}
}
