publications | Jiaqi Li's Homepage

2025

Tech Report

Overview of the Amphion Toolkit (v0. 2)

Jiaqi Li, Xueyao Zhang, Yuancheng Wang, Haorui He, Chaoren Wang, Li Wang, Huan Liao, Junyi Ao, Zeyu Xie, Yiqiao Huang, and others

arXiv preprint arXiv:2501.15442, 2025

arXiv Bib Code Huggingface

@article{li2025overview,
  title = {Overview of the Amphion Toolkit (v0. 2)},
  author = {Li, Jiaqi and Zhang, Xueyao and Wang, Yuancheng and He, Haorui and Wang, Chaoren and Wang, Li and Liao, Huan and Ao, Junyi and Xie, Zeyu and Huang, Yiqiao and others},
  journal = {arXiv preprint arXiv:2501.15442},
  year = {2025},
  huggingface = {https://huggingface.co/amphion},
  tldr = {This is the technical report for the second version of the Amphion toolkit.},
}

TL;DR: This is the technical report for the second version of the Amphion toolkit.

2024

SLT 2024

Investigating neural audio codecs for speech language model-based speech generation

Jiaqi Li, Dongmei Wang, Xiaofei Wang, Yao Qian, Long Zhou, Shujie Liu, Midia Yousefi, Canrun Li, Chung-Hsien Tsai, Zhen Xiao, and others

In 2024 IEEE Spoken Language Technology Workshop (SLT), 2024

arXiv Bib

@inproceedings{li2024investigating,
  title = {Investigating neural audio codecs for speech language model-based speech generation},
  author = {Li, Jiaqi and Wang, Dongmei and Wang, Xiaofei and Qian, Yao and Zhou, Long and Liu, Shujie and Yousefi, Midia and Li, Canrun and Tsai, Chung-Hsien and Xiao, Zhen and others},
  booktitle = {2024 IEEE Spoken Language Technology Workshop (SLT)},
  pages = {554--561},
  year = {2024},
  organization = {IEEE},
}

SLT 2024

Emilia: An Extensive, Multilingual, and Diverse Speech Dataset for Large-Scale Speech Generation

Haorui He, Zengqiang Shang, Chaoren Wang, Xuyuan Li, Yicheng Gu, Hua Hua, Liwei Liu, Chen Yang, Jiaqi Li, Peiyang Shi, Yuancheng Wang, Kai Chen, Pengyuan Zhang, and Zhizheng Wu

In 2024 IEEE Spoken Language Technology Workshop (SLT), 2024

arXiv Bib Huggingface

@inproceedings{he2024emilia,
  title = {Emilia: An Extensive, Multilingual, and Diverse Speech Dataset for Large-Scale Speech Generation},
  author = {He, Haorui and Shang, Zengqiang and Wang, Chaoren and Li, Xuyuan and Gu, Yicheng and Hua, Hua and Liu, Liwei and Yang, Chen and Li, Jiaqi and Shi, Peiyang and Wang, Yuancheng and Chen, Kai and Zhang, Pengyuan and Wu, Zhizheng},
  booktitle = {2024 IEEE Spoken Language Technology Workshop (SLT)},
  year = {2024},
  huggingface = {https://huggingface.co/datasets/amphion/Emilia},
  tldr = {We collect a 100k hours in-the-wild speech dataset for speech generation.},
}

TL;DR: We collect a 100k hours in-the-wild speech dataset for speech generation.

SLT 2024

Amphion: an Open-Source Audio, Music, and Speech Generation Toolkit

Xueyao Zhang^*, Liumeng Xue^*, Yicheng Gu^*, Yuancheng Wang^*, Jiaqi Li, Haorui He, Chaoren Wang, Songting Liu, Xi Chen, Junan Zhang, Tze Ying Tang, Lexiao Zou, Mingxuan Wang, Jun Han, Kai Chen, Haizhou Li, and Zhizheng Wu

In 2024 IEEE Spoken Language Technology Workshop (SLT), 2024

arXiv Bib Code Huggingface

@inproceedings{zhang2024amphion,
  title = {Amphion: an Open-Source Audio, Music, and Speech Generation Toolkit
  },
  author = {Zhang, Xueyao and Xue, Liumeng and Gu, Yicheng and Wang, Yuancheng and Li, Jiaqi and He, Haorui and Wang, Chaoren and Liu, Songting and Chen, Xi and Zhang, Junan and Tang, Tze Ying and Zou, Lexiao and Wang, Mingxuan and Han, Jun and Chen, Kai and Li, Haizhou and Wu, Zhizheng},
  booktitle = {2024 IEEE Spoken Language Technology Workshop (SLT)},
  year = {2024},
  huggingface = {https://huggingface.co/amphion},
  tldr = {We develop a unified toolkit for audio, music, and speech generation.},
}

TL;DR: We develop a unified toolkit for audio, music, and speech generation.

ICASSP 2024

An initial investigation of neural replay simulator for over-the-air adversarial perturbations to automatic speaker verification

Jiaqi Li, Li Wang, Liumeng Xue, Lei Wang, and Zhizheng Wu

In ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024

arXiv Bib

@inproceedings{li2024initial,
  title = {An initial investigation of neural replay simulator for over-the-air adversarial perturbations to automatic speaker verification},
  author = {Li, Jiaqi and Wang, Li and Xue, Liumeng and Wang, Lei and Wu, Zhizheng},
  booktitle = {ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages = {4635--4639},
  year = {2024},
  organization = {IEEE},
}

ICASSP 2024

Advsv: An over-the-air adversarial attack dataset for speaker verification

Li Wang, Jiaqi Li, Yuhao Luo, Jiahao Zheng, Lei Wang, Hao Li, Ke Xu, Chengfang Fang, Jie Shi, and Zhizheng Wu

In ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024

arXiv Bib

@inproceedings{wang2024advsv,
  title = {Advsv: An over-the-air adversarial attack dataset for speaker verification},
  author = {Wang, Li and Li, Jiaqi and Luo, Yuhao and Zheng, Jiahao and Wang, Lei and Li, Hao and Xu, Ke and Fang, Chengfang and Shi, Jie and Wu, Zhizheng},
  booktitle = {ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages = {4555--4559},
  year = {2024},
  organization = {IEEE},
}

Debatts: Zero-shot debating text-to-speech synthesis

Yiqiao Huang, Yuancheng Wang, Jiaqi Li, Haotian Guo, Haorui He, Shunsi Zhang, and Zhizheng Wu

arXiv preprint arXiv:2411.06540, 2024

2023

ROME: Testing image captioning systems via recursive object melting

Boxi Yu, Zhiqing Zhong, Jiaqi Li, Yixing Yang, Shilin He, and Pinjia He

In Proceedings of the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis, 2023