{"id":4026,"date":"2025-06-20T13:52:17","date_gmt":"2025-06-20T05:52:17","guid":{"rendered":"https:\/\/www.ihub1.com\/?p=4026"},"modified":"2025-06-20T13:53:32","modified_gmt":"2025-06-20T05:53:32","slug":"agent%e5%85%a8%e8%87%aa%e5%8a%a8%e6%90%ad%e5%bb%ba%e4%bb%a3%e7%a0%81%e8%bf%90%e8%a1%8c%e7%8e%af%e5%a2%83%ef%bc%8c%e5%ae%9e%e6%97%b6%e6%9b%b4%e6%96%b0%e8%a7%a3%e5%86%b3%e8%af%84%e6%b5%8b%e8%bf%87","status":"publish","type":"post","link":"https:\/\/www.ihub1.com\/index.php\/2025\/06\/20\/agent%e5%85%a8%e8%87%aa%e5%8a%a8%e6%90%ad%e5%bb%ba%e4%bb%a3%e7%a0%81%e8%bf%90%e8%a1%8c%e7%8e%af%e5%a2%83%ef%bc%8c%e5%ae%9e%e6%97%b6%e6%9b%b4%e6%96%b0%e8%a7%a3%e5%86%b3%e8%af%84%e6%b5%8b%e8%bf%87\/","title":{"rendered":"Agent\u5168\u81ea\u52a8\u642d\u5efa\u4ee3\u7801\u8fd0\u884c\u73af\u5883\uff0c\u5b9e\u65f6\u66f4\u65b0\u89e3\u51b3\u8bc4\u6d4b\u8fc7\u62df\u5408\/\u6570\u636e\u6c61\u67d3\u95ee\u9898\uff5c\u5fae\u8f6f"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"4026\" class=\"elementor elementor-4026\">\n\t\t\t\t<div class=\"elementor-element elementor-element-b81edf7 e-flex e-con-boxed e-con e-parent\" data-id=\"b81edf7\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-756340b elementor-widget elementor-widget-text-editor\" data-id=\"756340b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>\u957f\u671f\u4ee5\u6765\u4e3b\u6d41\u7684\u4ee3\u7801\u4fee\u590d\u8bc4\u6d4b\u57fa\u51c6SWE-bench\u9762\u4e34\u6570\u636e\u8fc7\u65f6\u3001\u8986\u76d6\u9762\u7a84\u3001\u624b\u52a8\u7ef4\u62a4\u6210\u672c\u9ad8\u7b49\u95ee\u9898\uff0c\u4e25\u91cd\u5236\u7ea6\u4e86AI\u6a21\u578b\u771f\u5b9e\u80fd\u529b\u7684\u5c55\u73b0\u3002<\/p><p>\u5fae\u8f6f\u53d1\u5e03\u5168\u65b0\u4ee3\u7801\u4fee\u590d\u8bc4\u6d4b\u57fa\u51c6\u00a0SWE-bench-Live\uff0c\u4e0d\u4ec5\u5f15\u5165\u4e86\u6765\u81eaGitHub\u6700\u65b0\u7684Issue\uff0c\u663e\u8457\u63d0\u5347\u4e86\u5bf9\u6a21\u578b\u8bc4\u4f30\u7684\u5b9e\u65f6\u6027\u4e0e\u51c6\u786e\u6027\uff0c\u8fd8\u5b9e\u73b0\u4ee3\u7801\u8fd0\u884c\u73af\u5883\u7684\u5168\u81ea\u52a8\u5316\u6784\u5efa\u4e0e\u81ea\u52a8\u66f4\u65b0\uff0c\u6253\u7834\u4e86\u4f20\u7edf\u9759\u6001\u8bc4\u6d4b\u57fa\u51c6\u7684\u5c40\u9650\u3002<\/p><p><img fetchpriority=\"high\" decoding=\"async\" src=\"https:\/\/q0.itc.cn\/q_70\/images03\/20250619\/e4851ea3171e479fb6f96ca15b4f1197.png\" width=\"640\" height=\"575\" \/><\/p><p><strong>\u25b3<\/strong>\u56fe1: SWE-bench-Live leaderboard.\u5168\u81ea\u52a8\u5316\u73af\u5883\u642d\u5efa<\/p><p>\u4f20\u7edf\u7684\u4ee3\u7801\u4fee\u590d\u8bc4\u6d4b\u57fa\u51c6\u9700\u8981\u4eba\u5de5\u6784\u5efa\u4ee3\u7801\u8fd0\u884c\u73af\u5883\uff0c\u4e0d\u4ec5\u6210\u672c\u9ad8\u6602\uff0c\u4e14\u66f4\u65b0\u7f13\u6162\uff0c\u96be\u4ee5\u8ddf\u4e0a\u8f6f\u4ef6\u5f00\u53d1\u73af\u5883\u7684\u5feb\u901f\u53d8\u5316\u3002SWE-bench-Live\u5f00\u521b\u6027\u5730\u91c7\u7528\u4e86\u57fa\u4e8eAgent\u7684\u667a\u80fd\u5316\u6846\u67b6<strong>REPOLAUNCH<\/strong>\uff0c\u5f7b\u5e95\u89e3\u51b3\u4e86\u8fd9\u4e9b\u95ee\u9898\u3002<\/p><p>REPOLAUNCH\u53ef\u4ee5\u6839\u636eGithub\u4e2d\u771f\u5b9e\u7684Issue\uff0c\u81ea\u52a8\u642d\u5efa\u5176Docker\u73af\u5883\u5e76\u6267\u884c\u6d4b\u8bd5\u9a8c\u8bc1\uff0c\u6574\u4e2a\u6d41\u7a0b\u5b8c\u5168\u65e0\u4eba\u5e72\u9884\uff0c\u5e76\u4e14\u6bcf\u6708\u81ea\u52a8\u66f4\u65b0\uff0c\u6301\u7eed\u63d0\u4f9b\u6700\u65b0\u9c9c\u3001\u6700\u5177\u4ee3\u8868\u6027\u7684\u8bc4\u6d4b\u6570\u636e\u3002\u8fd9\u79cd\u81ea\u52a8\u5316\u7684\u5b9e\u65f6\u66f4\u65b0\u6a21\u5f0f\uff0c\u6d88\u9664\u4e86\u6570\u636e\u6cc4\u9732\u4e0e\u6a21\u578b\u8fc7\u62df\u5408\u98ce\u9669\u3002<\/p><p><strong>\u25b3<\/strong>\u56fe2: \u81ea\u52a8\u5316\u6d41\u6c34\u7ebf\u6d41\u7a0b\u56fe<\/p><p><strong>REPOLAUNCH\u8be6\u7ec6\u6d41\u7a0b<\/strong><\/p><p>REPOLAUNCH\u7684\u6838\u5fc3\u539f\u7406\u662f\u5229\u7528\u667a\u80fdagent\u6280\u672f\u6a21\u4eff\u4eba\u7c7b\u5f00\u53d1\u8005\u7684\u73af\u5883\u6784\u5efa\u8fc7\u7a0b\u3002\u5177\u4f53\u6d41\u7a0b\u5305\u62ec\uff1a<\/p><ol><li><strong>\u76f8\u5173\u6587\u4ef6\u81ea\u52a8\u8bc6\u522b\uff1a<\/strong><p>\u667a\u80fd\u5730\u63d0\u53d6CI\/CD\u914d\u7f6e\u3001README\u6587\u4ef6\u7b49\u5173\u952e\u4fe1\u606f\u3002<\/p><\/li><li><strong>Docker\u73af\u5883\u81ea\u52a8\u9009\u62e9\u4e0e\u642d\u5efa\uff1a<\/strong><\/li><li><strong>\u667a\u80fdAgent\u4ea4\u4e92\u8fed\u4ee3\u8c03\u8bd5\uff1a<\/strong><p>agent\u4ee5ReAct\u6a21\u5f0f\uff08Reasoning+Action\uff09\u8fdb\u884c\u6301\u7eed\u8fed\u4ee3\u548c\u73af\u5883\u8c03\u8bd5\uff0c\u6a21\u62df\u5f00\u53d1\u8005\u884c\u4e3a\uff0c\u5feb\u901f\u5b9a\u4f4d\u5e76\u89e3\u51b3\u73af\u5883\u95ee\u9898\u3002<\/p><\/li><li><strong>\u73af\u5883\u56fa\u5316\u4e0e\u9a8c\u8bc1\uff1a<\/strong><p>\u6210\u529f\u642d\u5efa\u7684\u73af\u5883\u4ee5Docker\u955c\u50cf\u5f62\u5f0f\u56fa\u5316\uff0c\u786e\u4fdd\u4efb\u4f55\u4eba\u90fd\u80fd\u8f7b\u677e\u590d\u73b0\u548c\u4f7f\u7528\u3002<\/p><\/li><\/ol><p>\u4e0d\u4ec5\u5982\u6b64\uff0cREPOLAUNCH\u8fd8\u5177\u6709\u5e7f\u6cdb\u7684\u5e94\u7528\u6f5c\u529b\uff0c\u80fd\u591f\u652f\u6301\u66f4\u591a\u4e0b\u6e38\u4efb\u52a1\u3002\u4f8b\u5982\uff1a<\/p><ul><li><strong>\u81ea\u52a8\u5316\u65b0\u624b\u73af\u5883\u914d\u7f6e\uff1a<\/strong><p>\u5e2e\u52a9\u7f3a\u4e4f\u7ecf\u9a8c\u7684\u5f00\u53d1\u8005\u5feb\u901f\u642d\u5efa\u590d\u6742\u7684\u5f00\u53d1\u73af\u5883\u3002<\/p><\/li><li><strong>\u6784\u5efa\u5f3a\u5316\u5b66\u4e60\u53cd\u9988\u73af\u5883\uff1a<\/strong><p>\u4e3a\u5f3a\u5316\u5b66\u4e60\u6a21\u578b\u63d0\u4f9b\u81ea\u52a8\u5316\u7684\u4ee3\u7801\u4ea4\u4e92\u53cd\u9988\u73af\u5883\uff0c\u52a0\u901f\u6a21\u578b\u7684\u8fed\u4ee3\u4e0e\u4f18\u5316\u3002<\/p><\/li><li><strong>\u9057\u7559\u9879\u76ee\u73af\u5883\u91cd\u5efa\uff1a<\/strong><p>\u5feb\u901f\u6062\u590d\u5386\u53f2\u6216\u5e9f\u5f03\u4ee3\u7801\u9879\u76ee\u7684\u73af\u5883\uff0c\u89e3\u51b3\u4f9d\u8d56\u7248\u672c\u51b2\u7a81\u7b49\u95ee\u9898\u3002<\/p><\/li><\/ul><p>\u5b9e\u9a8c\u53d1\u73b0<\/p><p>\u9996\u6b21\u57fa\u4e8eSWE-bench-Live\u7684\u5168\u9762\u8bc4\u6d4b\u7ed3\u679c\u663e\u793a\uff0c\u5f53\u524d\u9876\u5c16\u5927\u6a21\u578b\u548c\u4ee3\u7801Agent\u7684\u8868\u73b0\u5927\u5e45\u4e0b\u6ed1\u3002<\/p><p>\u5728\u5b8c\u5168\u76f8\u540c\u7684\u5b9e\u9a8c\u8bbe\u7f6e\u4e0b\uff0c\u5728\u4f20\u7edf\u8bc4\u6d4b\u57fa\u51c6SWE-bench Verified\u4e2d\u8fbe\u523043.2%\u51c6\u786e\u7387\u7684OpenHands + Claude 3.7 Sonnet\u7ec4\u5408\uff0c\u8f6c\u5230SWE-bench-Live\u540e\u4ec5\u8fbe\u5230\u4e8619.25%\u7684\u51c6\u786e\u7387\u3002\u8fd9\u4e00\u660e\u663e\u5dee\u8ddd\u63ed\u793a\u4e86\u4f20\u7edf\u9759\u6001\u57fa\u51c6\u4e2d\u6f5c\u5728\u7684\u8fc7\u62df\u5408\u95ee\u9898\uff0c\u8868\u660e\u5b9e\u65f6\u3001\u591a\u6837\u7684\u6570\u636e\u73af\u5883\u5bf9\u6a21\u578b\u80fd\u529b\u7684\u5ba2\u89c2\u8bc4\u6d4b\u81f3\u5173\u91cd\u8981\u3002<\/p><p><img decoding=\"async\" src=\"https:\/\/q1.itc.cn\/q_70\/images03\/20250619\/f4e0a9e2f8d04aa1bd1a0788099cd433.jpeg\" width=\"640\" height=\"486\" \/><\/p><p><strong>\u25b3<\/strong>\u56fe3\uff1a\u6a21\u578b\u5728\u4e0d\u540c\u57fa\u51c6\u4e0a\u7684\u8868\u73b0\u5bf9\u6bd4\u56fe<\/p><p>\u5982\u56fe\u8fdb\u4e00\u6b65\u6df1\u5165\u7684\u5b9e\u9a8c\u5206\u6790\u663e\u793a\uff0c\u5373\u4f7f\u5728SWE-bench-Live\u4e2d\uff0cLLM\u5728\u4fee\u590d\u6765\u81ea\u975e\u539f\u6709SWE-bench\u4ed3\u5e93\u7684\u65b0Issue\u65f6\uff0c\u5176\u6210\u529f\u7387\u4e5f\u663e\u8457\u4f4e\u4e8e\u4fee\u590d\u539f\u6709SWE-bench\u4ed3\u5e93\u7684Issue\u3002\u8fd9\u4e00\u73b0\u8c61\u8bf4\u660e\uff0c\u73b0\u6709\u5927\u6a21\u578b\u53ef\u80fd\u5df2\u5728\u4f20\u7edf\u9759\u6001\u8bc4\u6d4b\u4e2d\u5f62\u6210\u4e86\u4e00\u5b9a\u7684\u8fc7\u62df\u5408\uff0c\u5bf9\u4e8e\u672a\u89c1\u8fc7\u7684\u65b0\u4ed3\u5e93\u548c\u65b0\u95ee\u9898\u8868\u73b0\u660e\u663e\u4e0b\u964d\uff0c\u8fdb\u4e00\u6b65\u51f8\u663e\u4e86SWE-bench-Live\u5b9e\u65f6\u3001\u52a8\u6001\u3001\u591a\u6837\u6027\u8bc4\u6d4b\u7684\u91cd\u8981\u6027\u3002<\/p><p><img decoding=\"async\" src=\"https:\/\/q8.itc.cn\/q_70\/images03\/20250619\/5401808007b247a6a1894d8a42dcccb5.jpeg\" width=\"640\" height=\"233\" \/><\/p><p><strong>\u25b3<\/strong>\u56fe4\uff1aOpenHands+Claude 3.7 Sonnet\u5728SWE-bench-Live\u4e0d\u540c\u4ed3\u5e93\u6765\u6e90\u7684\u6027\u80fd\u5bf9\u6bd4\u591a\u9886\u57df\u8986\u76d6\u4e0e\u591a\u6837\u5316\u6311\u6218<\/p><p>SWE-bench-Live\u7684\u9996\u6279\u4efb\u52a1\u6db5\u76d6\u4e861319\u4e2a\u771f\u5b9eIssue\uff0c\u6d89\u53ca93\u4e2a\u5f00\u6e90\u9879\u76ee\uff0c\u9886\u57df\u5305\u62ecAI\/ML\u3001DevOps\u3001Web\u5f00\u53d1\u3001\u6570\u636e\u5e93\u3001\u79d1\u5b66\u8ba1\u7b97\u7b49\u591a\u4e2a\u65b9\u5411\u3002\u8fd9\u79cd\u591a\u6837\u6027\u4e0e\u9ad8\u9891\u5b9e\u65f6\u66f4\u65b0\u4f7fSWE-bench-Live\u7684\u8bc4\u4f30\u66f4\u52a0\u51c6\u786e\uff0c\u66f4\u80fd\u53cd\u5e94\u6a21\u578b\u80fd\u529b\u7684\u9ad8\u4f4e\u3002<\/p><p><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/q4.itc.cn\/q_70\/images03\/20250619\/66c242add68841738404a61d2fe4eeda.jpeg\" width=\"640\" height=\"290\" \/><\/p><p><strong>\u25b3<\/strong>\u56fe5\uff1a\u4ed3\u5e93\u5206\u5e03\u548c\u4efb\u52a1\u7edf\u8ba1\u56fe<\/p><p>\u8fdb\u4e00\u6b65\u5206\u6790\u53d1\u73b0\uff0c\u5f53\u524dAI\u6a21\u578b\u5728\u5904\u7406\u7b80\u5355\u3001\u5355\u6587\u4ef6\u4fee\u6539\u4efb\u52a1\u65f6\u8868\u73b0\u826f\u597d\uff0c\u4f46\u9762\u5bf9\u590d\u6742\u3001\u591a\u6587\u4ef6\u3001\u591a\u884c\u4fee\u6539\u4efb\u52a1\u65f6\u51c6\u786e\u7387\u6025\u5267\u4e0b\u964d\u3002\u5c24\u5176\u662f\u5728\u9762\u5bf9\u4ee3\u7801\u89c4\u6a21\u8d85\u8fc750\u4e07\u884c\u7684\u5927\u578b\u9879\u76ee\u65f6\uff0c\u6a21\u578b\u7684\u8868\u73b0\u74f6\u9888\u5c24\u4e3a\u660e\u663e\u3002<\/p><p><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/q5.itc.cn\/q_70\/images03\/20250619\/8f200666a0864ed296207821c348207e.png\" width=\"640\" height=\"547\" \/><\/p><p>\u76ee\u524d\uff0cSWE-bench-Live\u5df2\u5728GitHub\u548cHuggingFace\u5e73\u53f0\u5168\u9762\u5f00\u653e\uff0c\u9762\u5411\u5168\u7403\u5f00\u53d1\u8005\u548c\u7814\u7a76\u4eba\u5458\u514d\u8d39\u63d0\u4f9b\u3002\u6b22\u8fce\u793e\u533a\u6210\u5458\u79ef\u6781\u53c2\u4e0e\uff0c\u5171\u540c\u63a8\u52a8AI\u4ee3\u7801\u4fee\u590d\u6280\u672f\u7684\u8fdb\u6b65\u3002<\/p><p>\u672c\u6587\u4ec5\u4ee3\u8868\u5a92\u4f53\u89c6\u89d2\u8fdb\u884c\u5185\u5bb9\u6574\u7406\u4e0e\u53d1\u5e03\uff0c\u4e0d\u4ee3\u8868\u5fae\u8f6f\u5b98\u65b9\u7acb\u573a\uff0c\u5c24\u5176\u4e0d\u4ee3\u8868\u5176\u5bf9\u76f8\u5173\u57fa\u51c6\u6d4b\u8bd5\u7ed3\u679c\u7684\u4efb\u4f55\u6001\u5ea6\u6216\u610f\u56fe\u3002<\/p><p>\u5b98\u65b9\u4e3b\u9875\/Leaderboard\uff1ahttps:\/\/swe-bench-live.github.io<\/p><p>GitHub\uff1ahttps:\/\/github.com\/microsoft\/SWE-bench-Live<\/p><p>HuggingFace\uff1ahttps:\/\/huggingface.co\/SWE-bench-Live<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>SWE-bench-Live\u56e2\u961f \u6295\u7a3f<\/p>\n","protected":false},"author":1,"featured_media":4028,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-4026","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-uncategorized"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts\/4026","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/comments?post=4026"}],"version-history":[{"count":7,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts\/4026\/revisions"}],"predecessor-version":[{"id":4034,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts\/4026\/revisions\/4034"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/media\/4028"}],"wp:attachment":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/media?parent=4026"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/categories?post=4026"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/tags?post=4026"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}