\n","updatedAt":"2024-06-08T23:32:05.436Z","author":{"_id":"6186ddf6a7717cb375090c01","avatarUrl":"/avatars/716b6a7d1094c8036b2a8a7b9063e8aa.svg","fullname":"Julien BLANCHON","name":"blanchon","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":146}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.5498823523521423},"editors":["blanchon"],"editorAvatarUrls":["/avatars/716b6a7d1094c8036b2a8a7b9063e8aa.svg"],"reactions":[],"isReport":false}}],"primaryEmailConfirmed":false,"paper":{"id":"2401.05561","authors":[{"_id":"65a09d6ce969415381f1a31d","user":{"_id":"65a52766215aabac489e3468","avatarUrl":"/avatars/fe05e22cd7e12e961296426434e17c76.svg","isPro":false,"fullname":"Lichao Sun","user":"sunlichao137","type":"user"},"name":"Lichao Sun","status":"claimed_verified","statusLastChangedAt":"2024-03-21T08:29:16.487Z","hidden":false},{"_id":"65a09d6ce969415381f1a31e","user":{"_id":"639d94ab7145123e0d44e48a","avatarUrl":"/avatars/5bb6a65b306d1383c4a8bcd9334b470a.svg","isPro":false,"fullname":"Yue Huang","user":"HowieHwong","type":"user"},"name":"Yue Huang","status":"claimed_verified","statusLastChangedAt":"2025-02-19T09:08:53.072Z","hidden":false},{"_id":"65a09d6ce969415381f1a31f","user":{"_id":"64b82c659ebb69a79f0073f6","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64b82c659ebb69a79f0073f6/INEHG7kijEHOhFZMjdBIM.png","isPro":false,"fullname":"Haoran Wang","user":"wang2226","type":"user"},"name":"Haoran Wang","status":"claimed_verified","statusLastChangedAt":"2024-01-15T07:42:05.891Z","hidden":false},{"_id":"65a09d6ce969415381f1a320","name":"Siyuan Wu","hidden":false},{"_id":"65a09d6ce969415381f1a321","user":{"_id":"67a99d1fef1439e285c4cbec","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/VrwUmrY2wsg4sVSIMc--K.png","isPro":false,"fullname":"Qihui Zhang","user":"77Hui","type":"user"},"name":"Qihui Zhang","status":"claimed_verified","statusLastChangedAt":"2025-03-21T11:51:37.824Z","hidden":false},{"_id":"65a09d6ce969415381f1a322","user":{"_id":"65a13cb1c5770b27aef2a2bc","avatarUrl":"/avatars/88ec5b988f10ad9fd4d469ae2fa34680.svg","isPro":false,"fullname":"Chujie Gao","user":"Flossie","type":"user"},"name":"Chujie Gao","status":"claimed_verified","statusLastChangedAt":"2025-02-21T10:01:10.373Z","hidden":false},{"_id":"65a09d6ce969415381f1a323","name":"Yixin Huang","hidden":false},{"_id":"65a09d6ce969415381f1a324","name":"Wenhan Lyu","hidden":false},{"_id":"65a09d6ce969415381f1a325","name":"Yixuan Zhang","hidden":false},{"_id":"65a09d6ce969415381f1a326","user":{"_id":"651dc3862c5d36450919d0dc","avatarUrl":"/avatars/03fb549171d3b103914aa64103e5739e.svg","isPro":false,"fullname":"Xiner Li","user":"hyanan16","type":"user"},"name":"Xiner Li","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:49:50.423Z","hidden":false},{"_id":"65a09d6ce969415381f1a327","name":"Zhengliang Liu","hidden":false},{"_id":"65a09d6ce969415381f1a328","user":{"_id":"6244de1c1c560fb11edfca44","avatarUrl":"/avatars/36558928bd04be7f49837d4c603681d7.svg","isPro":true,"fullname":"Yixin Liu","user":"henryL7","type":"user"},"name":"Yixin Liu","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:50:25.731Z","hidden":true},{"_id":"65a09d6ce969415381f1a329","name":"Yijue Wang","hidden":false},{"_id":"65a09d6ce969415381f1a32a","name":"Zhikun Zhang","hidden":false},{"_id":"65a09d6ce969415381f1a32b","user":{"_id":"65cb79db6427380bc21261e2","avatarUrl":"/avatars/a003eb5d0955417329c1a4170ae65879.svg","isPro":false,"fullname":"Bhavya Kailkhura","user":"bhavyakailkhura","type":"user"},"name":"Bhavya Kailkhura","status":"claimed_verified","statusLastChangedAt":"2024-05-28T12:44:31.015Z","hidden":false},{"_id":"65a09d6ce969415381f1a32c","user":{"_id":"649dbcc4e0fff1ed099dc80a","avatarUrl":"/avatars/c87c273ca628dbcddccbf1ee19b2ce33.svg","isPro":false,"fullname":"Caiming Xiong","user":"cxiong","type":"user"},"name":"Caiming Xiong","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:51:13.398Z","hidden":false},{"_id":"65a09d6ce969415381f1a32d","name":"Chao Zhang","hidden":false},{"_id":"65a09d6ce969415381f1a32e","name":"Chaowei Xiao","hidden":false},{"_id":"65a09d6ce969415381f1a32f","user":{"_id":"62aba526cae4462c0c6caa0f","avatarUrl":"/avatars/430560ec2c2547f819225769ab432f30.svg","isPro":false,"fullname":"Chunyuan Li","user":"Chunyuan24","type":"user"},"name":"Chunyuan Li","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:51:35.436Z","hidden":false},{"_id":"65a09d6ce969415381f1a330","user":{"_id":"64ff67722ad36636be6c4542","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/sLIrNelAWPVOy4e3oo5LB.jpeg","isPro":false,"fullname":"Eric Xing","user":"EricX003","type":"user"},"name":"Eric Xing","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:51:42.959Z","hidden":false},{"_id":"65a09d6ce969415381f1a331","user":{"_id":"64cbc3e2a257a3212c00a115","avatarUrl":"/avatars/836e61be4aeda2080ddf2db9f2626cc6.svg","isPro":false,"fullname":"Furong Huang Lab at UMD","user":"furongh-lab","type":"user"},"name":"Furong Huang","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:51:53.877Z","hidden":false},{"_id":"65a09d6ce969415381f1a332","user":{"_id":"62aa53fc5df2251f7df798c6","avatarUrl":"/avatars/1c7828ce861c30ddd0afceff355a92ec.svg","isPro":false,"fullname":"Hao","user":"haoliu","type":"user"},"name":"Hao Liu","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:52:12.162Z","hidden":false},{"_id":"65a09d6ce969415381f1a333","name":"Heng Ji","hidden":false},{"_id":"65a09d6ce969415381f1a334","user":{"_id":"64e5414fe56d371e42da69c4","avatarUrl":"/avatars/7d4a101b0338410bc736e04d7a611aca.svg","isPro":false,"fullname":"Hongyi Wang","user":"hwang595","type":"user"},"name":"Hongyi Wang","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:52:53.969Z","hidden":false},{"_id":"65a09d6ce969415381f1a335","name":"Huan Zhang","hidden":false},{"_id":"65a09d6ce969415381f1a336","name":"Huaxiu Yao","hidden":false},{"_id":"65a09d6ce969415381f1a337","name":"Manolis Kellis","hidden":false},{"_id":"65a09d6ce969415381f1a338","user":{"_id":"636826f95bb06007ea0e911e","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1667770136112-636826f95bb06007ea0e911e.jpeg","isPro":false,"fullname":"Marinka Zitnik","user":"marinkaz","type":"user"},"name":"Marinka Zitnik","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:54:07.144Z","hidden":false},{"_id":"65a09d6ce969415381f1a339","user":{"_id":"62cdc931c589e4a9e2411d39","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62cdc931c589e4a9e2411d39/Q91Xp6WeI50AjViMzbD3Q.png","isPro":false,"fullname":"Meng Jiang","user":"mjiang89","type":"user"},"name":"Meng Jiang","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:54:57.296Z","hidden":false},{"_id":"65a09d6ce969415381f1a33a","name":"Mohit Bansal","hidden":false},{"_id":"65a09d6ce969415381f1a33b","user":{"_id":"648a769003fc4a3938bb7943","avatarUrl":"/avatars/7647f99abdcca4251fcac7783b6fcc8d.svg","isPro":false,"fullname":"zou","user":"jameszou707","type":"user"},"name":"James Zou","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:55:28.539Z","hidden":false},{"_id":"65a09d6ce969415381f1a33c","user":{"_id":"63e9d45fccae1fe5c624704a","avatarUrl":"/avatars/24644537b352a4e43cc9fd05f954a43c.svg","isPro":false,"fullname":"jianpei.wjp","user":"jianpei","type":"user"},"name":"Jian Pei","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:55:37.180Z","hidden":false},{"_id":"65a09d6ce969415381f1a33d","name":"Jian Liu","hidden":false},{"_id":"65a09d6ce969415381f1a33e","user":{"_id":"641904caf9d6f1d772ec7af7","avatarUrl":"/avatars/4a63eac71eb30f70b1a0e9d4708f26c1.svg","isPro":false,"fullname":"Jianfeng Gao","user":"wyngjf","type":"user"},"name":"Jianfeng Gao","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:55:49.797Z","hidden":true},{"_id":"65a09d6ce969415381f1a33f","name":"Jiawei Han","hidden":false},{"_id":"65a09d6ce969415381f1a340","user":{"_id":"64ba0230c9a9894feb0cd32b","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64ba0230c9a9894feb0cd32b/V1XWySRXAhLN4wqXZJ3Ai.jpeg","isPro":false,"fullname":"Jieyu Zhao","user":"jieyuz","type":"user"},"name":"Jieyu Zhao","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:56:16.185Z","hidden":false},{"_id":"65a09d6ce969415381f1a341","name":"Jiliang Tang","hidden":false},{"_id":"65a09d6ce969415381f1a342","user":{"_id":"6204cc0d522e40b4a18d86e2","avatarUrl":"/avatars/18daf2de5671e711dc745388dd60569d.svg","isPro":false,"fullname":"Jindong Wang","user":"jindongwang","type":"user"},"name":"Jindong Wang","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:56:36.627Z","hidden":false},{"_id":"65a09d6ce969415381f1a343","name":"John Mitchell","hidden":false},{"_id":"65a09d6ce969415381f1a344","name":"Kai Shu","hidden":false},{"_id":"65a09d6ce969415381f1a345","user":{"_id":"65539ae68771298696b06aaf","avatarUrl":"/avatars/5240fa09876b90df763aee0e5a1c24e8.svg","isPro":false,"fullname":"Kaidi Xu","user":"kaidiXu","type":"user"},"name":"Kaidi Xu","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:57:22.933Z","hidden":false},{"_id":"65a09d6ce969415381f1a346","user":{"_id":"60b7b9d71b90c5d07c23fbd0","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1622653364258-noauth.jpeg","isPro":false,"fullname":"Kai-Wei Chang","user":"kaiweichang","type":"user"},"name":"Kai-Wei Chang","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:57:31.794Z","hidden":false},{"_id":"65a09d6ce969415381f1a347","name":"Lifang He","hidden":false},{"_id":"65a09d6ce969415381f1a348","name":"Lifu Huang","hidden":false},{"_id":"65a09d6ce969415381f1a349","user":{"_id":"64d0133a3c4a1b39a0f46288","avatarUrl":"/avatars/6f187dd88e88caf1fe31127c5d0827c1.svg","isPro":false,"fullname":"Heinrich","user":"backm007","type":"user"},"name":"Michael Backes","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:58:01.469Z","hidden":false},{"_id":"65a09d6ce969415381f1a34a","name":"Neil Zhenqiang Gong","hidden":false},{"_id":"65a09d6ce969415381f1a34b","name":"Philip S. Yu","hidden":false},{"_id":"65a09d6ce969415381f1a34c","user":{"_id":"6495dd0b71f6708e0f990032","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6495dd0b71f6708e0f990032/PBIjdKNnpkxvR_3djCGVm.png","isPro":true,"fullname":"Pin-Yu Chen","user":"pinyuchen","type":"user"},"name":"Pin-Yu Chen","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:58:27.459Z","hidden":false},{"_id":"65a09d6ce969415381f1a34d","user":{"_id":"64c039128e2612254356bba5","avatarUrl":"/avatars/06cc76feebba0cc80ebb8f4ff86f6d9b.svg","isPro":false,"fullname":"Quanquan Gu","user":"thughost","type":"user"},"name":"Quanquan Gu","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:58:35.118Z","hidden":false},{"_id":"65a09d6ce969415381f1a34e","user":{"_id":"6465c4c863e7e09dd02e3e1b","avatarUrl":"/avatars/200b029184d2616f98296a2c212f0785.svg","isPro":false,"fullname":"Ran Xu","user":"xurantju","type":"user"},"name":"Ran Xu","status":"claimed_verified","statusLastChangedAt":"2024-08-19T22:15:22.742Z","hidden":false},{"_id":"65a09d6ce969415381f1a34f","name":"Rex Ying","hidden":false},{"_id":"65a09d6ce969415381f1a350","name":"Shuiwang Ji","hidden":false},{"_id":"65a09d6ce969415381f1a351","user":{"_id":"645092f95fdcff143ad9ae84","avatarUrl":"/avatars/e393586c34ad7e35091db8e13a8f2166.svg","isPro":false,"fullname":"Suman jana","user":"Suman116","type":"user"},"name":"Suman Jana","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:59:11.212Z","hidden":false},{"_id":"65a09d6ce969415381f1a352","name":"Tianlong Chen","hidden":false},{"_id":"65a09d6ce969415381f1a353","name":"Tianming Liu","hidden":false},{"_id":"65a09d6ce969415381f1a354","user":{"_id":"647f5af5b0e96764589f3b2a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/VJ4cDyjp5M3V5WmI5gPIU.jpeg","isPro":false,"fullname":"Tianyi Zhou","user":"zhoutianyi","type":"user"},"name":"Tianyi Zhou","status":"admin_assigned","statusLastChangedAt":"2024-01-12T10:59:54.064Z","hidden":false},{"_id":"65a09d6ce969415381f1a355","name":"Willian Wang","hidden":true},{"_id":"65a09d6ce969415381f1a356","name":"Xiang Li","hidden":false},{"_id":"65a09d6ce969415381f1a357","user":{"_id":"605a97d9b54d35bc67a4ff12","avatarUrl":"/avatars/7a48a2dac4e6ebb9e775022e15ddc5a7.svg","isPro":false,"fullname":"zhangxiangliang","user":"ZhangXiangliang","type":"user"},"name":"Xiangliang Zhang","status":"admin_assigned","statusLastChangedAt":"2024-01-12T11:00:18.435Z","hidden":false},{"_id":"65a09d6ce969415381f1a358","name":"Xiao Wang","hidden":false},{"_id":"65a09d6ce969415381f1a359","name":"Xing Xie","hidden":false},{"_id":"65a09d6ce969415381f1a35a","name":"Xun Chen","hidden":false},{"_id":"65a09d6ce969415381f1a35b","name":"Xuyu Wang","hidden":false},{"_id":"65a09d6ce969415381f1a35c","name":"Yan Liu","hidden":false},{"_id":"65a09d6ce969415381f1a35d","name":"Yanfang Ye","hidden":false},{"_id":"65a09d6ce969415381f1a35e","name":"Yinzhi Cao","hidden":false},{"_id":"65a09d6ce969415381f1a35f","name":"Yue Zhao","hidden":false}],"publishedAt":"2024-01-10T22:07:21.000Z","submittedOnDailyAt":"2024-01-11T23:31:16.768Z","title":"TrustLLM: Trustworthiness in Large Language Models","submittedOnDailyBy":{"_id":"60f1abe7544c2adfd699860c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1674929746905-60f1abe7544c2adfd699860c.jpeg","isPro":false,"fullname":"AK","user":"akhaliq","type":"user"},"summary":"Large language models (LLMs), exemplified by ChatGPT, have gained\nconsiderable attention for their excellent natural language processing\ncapabilities. Nonetheless, these LLMs present many challenges, particularly in\nthe realm of trustworthiness. Therefore, ensuring the trustworthiness of LLMs\nemerges as an important topic. This paper introduces TrustLLM, a comprehensive\nstudy of trustworthiness in LLMs, including principles for different dimensions\nof trustworthiness, established benchmark, evaluation, and analysis of\ntrustworthiness for mainstream LLMs, and discussion of open challenges and\nfuture directions. Specifically, we first propose a set of principles for\ntrustworthy LLMs that span eight different dimensions. Based on these\nprinciples, we further establish a benchmark across six dimensions including\ntruthfulness, safety, fairness, robustness, privacy, and machine ethics. We\nthen present a study evaluating 16 mainstream LLMs in TrustLLM, consisting of\nover 30 datasets. Our findings firstly show that in general trustworthiness and\nutility (i.e., functional effectiveness) are positively related. Secondly, our\nobservations reveal that proprietary LLMs generally outperform most open-source\ncounterparts in terms of trustworthiness, raising concerns about the potential\nrisks of widely accessible open-source LLMs. However, a few open-source LLMs\ncome very close to proprietary ones. Thirdly, it is important to note that some\nLLMs may be overly calibrated towards exhibiting trustworthiness, to the extent\nthat they compromise their utility by mistakenly treating benign prompts as\nharmful and consequently not responding. Finally, we emphasize the importance\nof ensuring transparency not only in the models themselves but also in the\ntechnologies that underpin trustworthiness. Knowing the specific trustworthy\ntechnologies that have been employed is crucial for analyzing their\neffectiveness.","upvotes":69,"discussionId":"65a09d6ce969415381f1a37a","ai_summary":"This study assesses the trustworthiness of large language models across various dimensions, including truthfulness, safety, fairness, robustness, privacy, and machine ethics, finding a positive correlation with utility and highlighting differences between proprietary and open-source models.","ai_keywords":["TrustLLM","large language models","LLMs","truthfulness","safety","fairness","robustness","privacy","machine ethics","evaluation","benchmark"]},"canReadDatabase":false,"canManagePapers":false,"canSubmit":false,"hasHfLevelAccess":false,"upvoted":false,"upvoters":[{"_id":"64169174bce2fed80ab83add","avatarUrl":"/avatars/d3baddcfe3547de63f56ffe2a737282d.svg","isPro":false,"fullname":"Yixin Liu","user":"yixin6178","type":"user"},{"_id":"6538119803519fddb4a17e10","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6538119803519fddb4a17e10/ffJMkdx-rM7VvLTCM6ri_.jpeg","isPro":false,"fullname":"samusenps","user":"samusenps","type":"user"},{"_id":"65a135df18b7d7c225737173","avatarUrl":"/avatars/cc829bf663da9ca9df9d92feea80e42f.svg","isPro":false,"fullname":"Ruoxi Chen","user":"Dipsy0830","type":"user"},{"_id":"639d94ab7145123e0d44e48a","avatarUrl":"/avatars/5bb6a65b306d1383c4a8bcd9334b470a.svg","isPro":false,"fullname":"Yue Huang","user":"HowieHwong","type":"user"},{"_id":"6039478ab3ecf716b1a5fd4d","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6039478ab3ecf716b1a5fd4d/_Thy4E7taiSYBLKxEKJbT.jpeg","isPro":true,"fullname":"taesiri","user":"taesiri","type":"user"},{"_id":"65a13cb1c5770b27aef2a2bc","avatarUrl":"/avatars/88ec5b988f10ad9fd4d469ae2fa34680.svg","isPro":false,"fullname":"Chujie Gao","user":"Flossie","type":"user"},{"_id":"636af1370d7b46535de59ddf","avatarUrl":"/avatars/f57b5a074491ce20611f4e08591b8815.svg","isPro":false,"fullname":"Haolong Jia","user":"JHL328","type":"user"},{"_id":"643be8879f5d314db2d9ed23","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/643be8879f5d314db2d9ed23/VrW2UtJ7ppOnGIYjTWd7b.png","isPro":false,"fullname":"Chen Dongping","user":"shuaishuaicdp","type":"user"},{"_id":"63e2b1ec282ee5f9624cfbcb","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/63e2b1ec282ee5f9624cfbcb/4SVTp93cvRevacoJgiXzS.jpeg","isPro":false,"fullname":"Neel Jain","user":"nsjain","type":"user"},{"_id":"63b543bd0d5913eee47fd6bc","avatarUrl":"/avatars/616b4f6dd070cc445a720cb985808aea.svg","isPro":false,"fullname":"Alice Shi","user":"SmallLion","type":"user"},{"_id":"6546a776407bb19ff56ed43a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6546a776407bb19ff56ed43a/z6o_nIRjsregMhPiJ8KNy.png","isPro":false,"fullname":"TrustLLM","user":"TrustLLM","type":"user"},{"_id":"65a29d8ef95d78ab762a3d22","avatarUrl":"/avatars/116345600319bacc7f87554e741787f3.svg","isPro":false,"fullname":"Zenghui Yuan","user":"HUSTyzh","type":"user"}],"acceptLanguages":["*"],"dailyPaperRank":0}">
This study assesses the trustworthiness of large language models across various dimensions, including truthfulness, safety, fairness, robustness, privacy, and machine ethics, finding a positive correlation with utility and highlighting differences between proprietary and open-source models.
AI-generated summary
Large language models (LLMs), exemplified by ChatGPT, have gained
considerable attention for their excellent natural language processing
capabilities. Nonetheless, these LLMs present many challenges, particularly in
the realm of trustworthiness. Therefore, ensuring the trustworthiness of LLMs
emerges as an important topic. This paper introduces TrustLLM, a comprehensive
study of trustworthiness in LLMs, including principles for different dimensions
of trustworthiness, established benchmark, evaluation, and analysis of
trustworthiness for mainstream LLMs, and discussion of open challenges and
future directions. Specifically, we first propose a set of principles for
trustworthy LLMs that span eight different dimensions. Based on these
principles, we further establish a benchmark across six dimensions including
truthfulness, safety, fairness, robustness, privacy, and machine ethics. We
then present a study evaluating 16 mainstream LLMs in TrustLLM, consisting of
over 30 datasets. Our findings firstly show that in general trustworthiness and
utility (i.e., functional effectiveness) are positively related. Secondly, our
observations reveal that proprietary LLMs generally outperform most open-source
counterparts in terms of trustworthiness, raising concerns about the potential
risks of widely accessible open-source LLMs. However, a few open-source LLMs
come very close to proprietary ones. Thirdly, it is important to note that some
LLMs may be overly calibrated towards exhibiting trustworthiness, to the extent
that they compromise their utility by mistakenly treating benign prompts as
harmful and consequently not responding. Finally, we emphasize the importance
of ensuring transparency not only in the models themselves but also in the
technologies that underpin trustworthiness. Knowing the specific trustworthy
technologies that have been employed is crucial for analyzing their
effectiveness.
Exactly how is trustworthiness innately related positively to utility? After reading this paper i'm more confused, because a whole bunch of the utility of current models comes down to toxicity and semantic filtering. Also, the "raising concerns about open source models" is silly and ridiculous. Those that can't handle technology won't be using open models in the first place. That raises the obvious question, what large model owners sponsored this study to sow fear and doubt? Knowing something is able to be trusted is different from trust = effectiveness. All depends on your use case and what bias you're bringing to the table.
Agree on one point though, gatekeepers and routers need to be built differently than regular models to be true ground-truth machines - and we all know that nobody, not even the big kids, has that part down to science yet.
Exactly how is trustworthiness innately related positively to utility?
I share your sentiment and I think the answer lies in the difference between what developers want and what business executives want. Businesses want woke models that reinforce their worldview and don't hurt anyone's feelings. Developers don't necessarily care about your feelings, we want our models to do exactly what they're told to do, when they're told to do it. Utility (unrestricted instruction following (aka logic)) will prevail, but I imagine we'll end up with uncensored models as a backend, with a censored model on the front end. That way everybody can have their cake and eat it.
Can you imagine what would happen if a company released a new programming language, that scanned your variables for toxic content and sent them to the garbage collector? I have a slight suspicion that language would be unceremoniously dumped alongside Windows 8.