I am a final-year PhD candidate at UIUC CS, advised by Lingming Zhang. I was also a student researcher at Meta FAIR from 2024–2025, working at the CodeGen/CodeLlama team.
I train code agents.
Research impact: I lead Self-Play SWE-RL (SSR) (ICML’26), SWE-RL (NeurIPS’25), Magicoder (ICML’24), and SelfCodeAlign (NeurIPS’24), projects that have captured broad attention and are widely adopted across industry and open source.
@inproceedings{wei2026toward,
title = {Toward Training Superintelligent Software Agents through Self-Play SWE-RL},
author = {Yuxiang Wei and Zhiqing Sun and Emily McMilin and Jonas Gehring and David Zhang and Gabriel Synnaeve and Daniel Fried and Lingming Zhang and Sida Wang},
booktitle = {Forty-third International Conference on Machine Learning},
year = {2026},
url = {https://openreview.net/forum?id=0ophJB76sC}
}
@misc{xia2025livesweagent,
title = {Live-SWE-agent: Can Software Engineering Agents Self-Evolve on the Fly?},
author = {Chunqiu Steven Xia and Zhe Wang and Yan Yang and Yuxiang Wei and Lingming Zhang},
year = {2025},
eprint = {2511.13646},
archivePrefix = {arXiv},
primaryClass = {cs.SE},
url = {https://arxiv.org/abs/2511.13646}
}
@misc{copet2025cwm,
title = {CWM: An Open-Weights LLM for Research on Code Generation with World Models},
author = {Jade Copet and Quentin Carbonneaux and Gal Cohen and Jonas Gehring and Jacob Kahn and Jannik Kossen and Felix Kreuk and Emily McMilin and Michel Meyer and Yuxiang Wei and David Zhang and Kunhao Zheng and Jordi Armengol-Estapé and Pedram Bashiri and Maximilian Beck and Pierre Chambon and Abhishek Charnalia and Chris Cummins and Juliette Decugis and Zacharias V. Fisches and François Fleuret and Fabian Gloeckle and Alex Gu and Michael Hassid and Daniel Haziza and Badr Youbi Idrissi and Christian Keller and Rahul Kindi and Hugh Leather and Gallil Maimon and Aram Markosyan and Francisco Massa and Pierre-Emmanuel Mazaré and Vegard Mella and Naila Murray and Keyur Muzumdar and Peter O'Hearn and Matteo Pagliardini and Dmitrii Pedchenko and Tal Remez and Volker Seeker and Marco Selvi and Oren Sultan and Sida Wang and Luca Wehrstedt and Ori Yoran and Lingming Zhang and Taco Cohen and Yossi Adi and Gabriel Synnaeve},
year = {2025},
eprint = {2510.02387},
archivePrefix = {arXiv},
primaryClass = {cs.SE},
url = {https://ai.meta.com/research/publications/cwm/}
}
@inproceedings{wei2025swerl,
title = {SWE-RL: Advancing LLM Reasoning via Reinforcement Learning on Open Software Evolution},
author = {Yuxiang Wei and Olivier Duchenne and Jade Copet and Quentin Carbonneaux and Lingming Zhang and Daniel Fried and Gabriel Synnaeve and Rishabh Singh and Sida Wang},
booktitle = {The Thirty-ninth Annual Conference on Neural Information Processing Systems},
year = {2025},
url = {https://openreview.net/forum?id=ULblO61XZ0}
}
@inproceedings{liu2025purpcode,
title = {PurpCode: Reasoning for Safer Code Generation},
author = {Jiawei Liu and Nirav Diwan and Zhe Wang and Haoyu Zhai and Xiaona Zhou and Kiet A. Nguyen and Tianjiao Yu and Muntasir Wahed and Yinlin Deng and Hadjer Benkraouda and Yuxiang Wei and Lingming Zhang and Ismini Lourentzou and Gang Wang},
booktitle = {The Thirty-ninth Annual Conference on Neural Information Processing Systems},
year = {2025},
url = {https://openreview.net/forum?id=VUoY5kacG5}
}
@inproceedings{wei2024selfcodealign,
title = {SelfCodeAlign: Self-Alignment for Code Generation},
author = {Yuxiang Wei and Federico Cassano and Jiawei Liu and Yifeng Ding and Naman Jain and Zachary Mueller and Harm de Vries and Leandro Von Werra and Arjun Guha and Lingming Zhang},
booktitle = {The Thirty-eighth Annual Conference on Neural Information Processing Systems},
year = {2024},
url = {https://openreview.net/forum?id=xXRnUU7xTL}
}
@inproceedings{wei2025arcticsnowcoder,
title = {Arctic-SnowCoder: Demystifying High-Quality Data in Code Pretraining},
author = {Yuxiang Wei and Hojae Han and Rajhans Samdani},
booktitle = {ICLR 2025 Third Workshop on Deep Learning for Code},
year = {2025},
url = {https://openreview.net/forum?id=lP44oj9cWU}
}
@inproceedings{liu2024evaluating,
title = {Evaluating Language Models for Efficient Code Generation},
author = {Jiawei Liu and Songrun Xie and Junhao Wang and Yuxiang Wei and Yifeng Ding and Lingming Zhang},
booktitle = {First Conference on Language Modeling},
year = {2024},
url = {https://openreview.net/forum?id=IBCBMeAhmC}
}
@inproceedings{liu2024repoqa,
title = {RepoQA: Evaluating Long Context Code Understanding},
author = {Jiawei Liu and Jia Le Tian and Vijay Daita and Yuxiang Wei and Yifeng Ding and Yuhan Katherine Wang and Jun Yang and Lingming Zhang},
booktitle = {First Workshop on Long-Context Foundation Models @ ICML 2024},
year = {2024},
url = {https://openreview.net/forum?id=hK9YSrFuGf}
}
@online{wei2024starcoder2instruct,
title = {StarCoder2-Instruct: Fully Transparent and Permissive Self-Alignment for Code Generation},
author = {Yuxiang Wei and Federico Cassano and Jiawei Liu and Yifeng Ding and Naman Jain and Harm de Vries and Leandro von Werra and Arjun Guha and Lingming Zhang},
year = {2024},
url = {https://huggingface.co/blog/sc2-instruct}
}
@misc{lozhkov2024starcoder2,
title = {StarCoder 2 and The Stack v2: The Next Generation},
author = {Anton Lozhkov and Raymond Li and Loubna Ben Allal and Federico Cassano and Joel Lamy-Poirier and Nouamane Tazi and Ao Tang and Dmytro Pykhtar and Jiawei Liu and Yuxiang Wei and Tianyang Liu and Max Tian and Denis Kocetkov and Arthur Zucker and Younes Belkada and Zijian Wang and Qian Liu and Dmitry Abulkhanov and Indraneil Paul and Zhuang Li and Wen-Ding Li and Megan Risdal and Jia Li and Jian Zhu and Terry Yue Zhuo and Evgenii Zheltonozhskii and Nii Osae Osae Dade and Wenhao Yu and Lucas Krauß and Naman Jain and Yixuan Su and Xuanli He and Manan Dey and Edoardo Abati and Yekun Chai and Niklas Muennighoff and Xiangru Tang and Muhtasham Oblokulov and Christopher Akiki and Marc Marone and Chenghao Mou and Mayank Mishra and Alex Gu and Binyuan Hui and Tri Dao and Armel Zebaze and Olivier Dehaene and Nicolas Patry and Canwen Xu and Julian McAuley and Han Hu and Torsten Scholak and Sebastien Paquet and Jennifer Robinson and Carolyn Jane Anderson and Nicolas Chapados and Mostofa Patwary and Nima Tajbakhsh and Yacine Jernite and Carlos Muñoz Ferrandis and Lingming Zhang and Sean Hughes and Thomas Wolf and Arjun Guha and Leandro von Werra and Harm de Vries},
year = {2024},
eprint = {2402.19173},
archiveprefix = {arXiv},
primaryclass = {cs.SE},
url = {https://arxiv.org/abs/2402.19173}
}
@inproceedings{ding2024xft,
title = {XFT: Unlocking the Power of Code Instruction Tuning by Simply Merging Upcycled Mixture-of-Experts},
author = {Ding, Yifeng and Liu, Jiawei and Wei, Yuxiang and Zhang, Lingming},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
month = aug,
year = {2024},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.acl-long.699},
pages = {12941--12955},
address = {Bangkok, Thailand}
}
@inproceedings{wei2024magicoder,
title = {Magicoder: Empowering Code Generation with {OSS}-Instruct},
author = {Wei, Yuxiang and Wang, Zhe and Liu, Jiawei and Ding, Yifeng and Zhang, Lingming},
booktitle = {Proceedings of the 41st International Conference on Machine Learning},
pages = {52632--52657},
year = {2024},
volume = {235},
series = {Proceedings of Machine Learning Research},
month = {21--27 Jul},
publisher = {PMLR},
pdf = {https://raw.githubusercontent.com/mlresearch/v235/main/assets/wei24h/wei24h.pdf},
url = {https://proceedings.mlr.press/v235/wei24h.html}
}
@inproceedings{wei2023copiloting,
author = {Wei, Yuxiang and Xia, Chunqiu Steven and Zhang, Lingming},
title = {Copiloting the Copilots: Fusing Large Language Models with Completion Engines for Automated Program Repair},
year = {2023},
isbn = {9798400703270},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3611643.3616271},
doi = {10.1145/3611643.3616271},
booktitle = {Proceedings of the 31st ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
pages = {172–184},
numpages = {13},
location = {San Francisco, CA, USA},
keywords = {Completion Engine, Program Repair, Large Language Model}
}
@inproceedings{xia2023automated,
author = {Xia, Chunqiu Steven and Wei, Yuxiang and Zhang, Lingming},
title = {Automated Program Repair in the Era of Large Pre-Trained Language Models},
year = {2023},
isbn = {9781665457019},
publisher = {IEEE Press},
url = {https://doi.org/10.1109/ICSE48619.2023.00129},
doi = {10.1109/ICSE48619.2023.00129},
booktitle = {Proceedings of the 45th International Conference on Software Engineering},
pages = {1482--1494},
numpages = {13},
location = {Melbourne, Victoria, Australia},
series = {ICSE '23}
}
@article{liu2022coverage,
author = {Liu, Jiawei and Wei, Yuxiang and Yang, Sen and Deng, Yinlin and Zhang, Lingming},
title = {Coverage-Guided Tensor Compiler Fuzzing with Joint IR-Pass Mutation},
year = {2022},
issue_date = {April 2022},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {6},
number = {OOPSLA1},
url = {https://doi.org/10.1145/3527317},
doi = {10.1145/3527317},
abstract = {In the past decade, Deep Learning (DL) systems have been widely deployed in various application domains to facilitate our daily life, e.g., natural language processing, healthcare, activity recognition, and autonomous driving. Meanwhile, it is extremely challenging to ensure the correctness of DL systems (e.g., due to their intrinsic nondeterminism), and bugs in DL systems can cause serious consequences and may even threaten human lives. In the literature, researchers have explored various techniques to test, analyze, and verify DL models, since their quality directly affects the corresponding system behaviors. Recently, researchers have also proposed novel techniques for testing the underlying operator-level DL libraries (such as TensorFlow and PyTorch), which provide general binary implementations for each high-level DL operator and are the foundation for running DL models on different hardware platforms. However, there is still limited work targeting the reliability of the emerging tensor compilers (also known as DL compilers), which aim to automatically compile high-level tensor computation graphs directly into high-performance binaries for better efficiency, portability, and scalability than traditional operator-level libraries. Therefore, in this paper, we target the important problem of tensor compiler testing, and have proposed Tzer, a practical fuzzing technique for the widely used TVM tensor compiler. Tzer focuses on mutating the low-level Intermediate Representation (IR) for TVM due to the limited mutation space for the high-level IR. More specifically, Tzer leverages both general-purpose and tensor-compiler-specific mutators guided by coverage feedback for diverse and evolutionary IR mutation; furthermore, since tensor compilers provide various passes (i.e., transformations) for IR optimization, Tzer also performs pass mutation in tandem with IR mutation for more effective fuzzing. Our experimental results show that Tzer substantially outperforms existing fuzzing techniques on tensor compiler testing, with 75\% higher coverage and 50\% more valuable tests than the 2nd-best technique. Also, different components of Tzer have been validated via ablation study. To date, Tzer has detected 49 previously unknown bugs for TVM, with 37 bugs confirmed and 25 bugs fixed (PR merged).},
journal = {Proc. ACM Program. Lang.},
month = {apr},
articleno = {73},
numpages = {26},
keywords = {Compiler Testing, Machine Learning Systems, Fuzzing}
}
@article{li2021character,
author = {Li, Haolong and Zhong, Zizheng and Guan, Wei and Du, Chenghao and Yang, Yu and Wei, Yuxiang and Ye, Chen},
da = {2021/09/01},
date-added = {2023-10-29 20:40:01 -0500},
date-modified = {2023-10-29 20:40:01 -0500},
doi = {10.1007/s00371-021-02218-y},
id = {Li2021},
isbn = {1432-2315},
journal = {The Visual Computer},
number = {9},
pages = {2895--2906},
title = {Generative character inpainting guided by structural information},
ty = {JOUR},
url = {https://doi.org/10.1007/s00371-021-02218-y},
volume = {37},
year = {2021},
Bdsk-Url-1 = {https://doi.org/10.1007/s00371-021-02218-y}
}