@inproceedings{cai:asplos:2025:mocsystem,

 author = {Cai, Weilin and Qin, Le and Huang, Jiayi},

 title = {{MoC-System: Efficient Fault Tolerance for Sparse Mixture-of-Experts Model Training}},

 booktitle = {Proceedings of the 30th International ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '25)},

 pages = {655--671},

 month = {April},

 year = {2025}

}