Bibtex
|
ACM
|
MLA
|
APA
|
Harvard
|
Vancouver
|
Chicago
@misc{Qiu2025Gated, title = {{Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free}}, author = {Qiu, Zihan and Wang, Zekun and Zheng, Bo and Huang, Zeyu and Wen, Kaiyue and Yang, Songlin and Men, Rui and Yu, Le and Huang, Fei and Huang, Suozhi and Liu, Dayiheng and Zhou, Jingren and Lin, Junyang}, year = {2025}, publisher = {arXiv}, doi = {10.48550/arXiv.2505.06708}, }
Zihan Qiu, Zekun Wang, Bo Zheng, Zeyu Huang, Kaiyue Wen, Songlin Yang, Rui Men, Le Yu, Fei Huang, Suozhi Huang, Dayiheng Liu, Jingren Zhou, and Junyang Lin. 2025. Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free. arXiv.2505.06708, pp. . DOI: https://doi.org/10.48550/arXiv.2505.06708
Qiu, Zihan, Wang, Zekun, Zheng, Bo, Huang, Zeyu, Wen, Kaiyue, Yang, Songlin, Men, Rui, Yu, Le, Huang, Fei, Huang, Suozhi, Liu, Dayiheng, Zhou, Jingren, and Lin, Junyang. "Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free". arXiv.2505.06708, pp. . 2025.
Qiu, Z., Wang, Z., Zheng, B., Huang, Z., Wen, K., Yang, S., Men, R., Yu, L., Huang, F., Huang, S., Liu, D., Zhou, J., & Lin, J. (2025). Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free. arXiv.2505.06708, pp. .
Qiu, Z., Wang, Z., Zheng, B., Huang, Z., Wen, K., Yang, S., Men, R., Yu, L., Huang, F., Huang, S., Liu, D., Zhou, J., Lin, J., 2025. Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free.arXiv.2505.06708, pp.
Qiu Z, Wang Z, Zheng B, Huang Z, Wen K, Yang S, Men R, Yu L, Huang F, Huang S, Liu D, Zhou J, Lin J. Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free.arXiv.2505.067082025; pp. .
Qiu, Zihan, Wang, Zekun, Zheng, Bo, Huang, Zeyu, Wen, Kaiyue, Yang, Songlin, Men, Rui, Yu, Le, Huang, Fei, Huang, Suozhi, Liu, Dayiheng, Zhou, Jingren, and Lin, Junyang "Gated Attention for Large Language Models: Non-linearity, Sparsity, and Attention-Sink-Free". arXiv.2505.06708, pp. . 2025.