Bibtex
|
ACM
|
MLA
|
APA
|
Harvard
|
Vancouver
|
Chicago
@misc{Rafailov2023Direct, title = {{Direct Preference Optimization: Your Language Model Is Secretly a Reward Model}}, author = {Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Ermon, Stefano and Manning, Christopher D. and Finn, Chelsea}, year = {2023}, publisher = {arXiv}, doi = {10.48550/arXiv.2305.18290}, }
Rafael Rafailov, Archit Sharma, Eric Mitchell, Stefano Ermon, Christopher D. Manning, and Chelsea Finn. 2023. Direct Preference Optimization: Your Language Model Is Secretly a Reward Model. arXiv.2305.18290, pp. . DOI: https://doi.org/10.48550/arXiv.2305.18290
Rafailov, Rafael, Sharma, Archit, Mitchell, Eric, Ermon, Stefano, Manning, Christopher D., and Finn, Chelsea. "Direct Preference Optimization: Your Language Model Is Secretly a Reward Model". arXiv.2305.18290, pp. . 2023.
Rafailov, R., Sharma, A., Mitchell, E., Ermon, S., Manning, C., & Finn, C. (2023). Direct Preference Optimization: Your Language Model Is Secretly a Reward Model. arXiv.2305.18290, pp. .
Rafailov, R., Sharma, A., Mitchell, E., Ermon, S., Manning, C., Finn, C., 2023. Direct Preference Optimization: Your Language Model Is Secretly a Reward Model.arXiv.2305.18290, pp.
Rafailov R, Sharma A, Mitchell E, Ermon S, Manning C, Finn C. Direct Preference Optimization: Your Language Model Is Secretly a Reward Model.arXiv.2305.182902023; pp. .
Rafailov, Rafael, Sharma, Archit, Mitchell, Eric, Ermon, Stefano, Manning, Christopher D., and Finn, Chelsea "Direct Preference Optimization: Your Language Model Is Secretly a Reward Model". arXiv.2305.18290, pp. . 2023.