{"id":4610,"date":"2025-12-08T17:18:44","date_gmt":"2025-12-08T09:18:44","guid":{"rendered":"https:\/\/www.ihub1.com\/?p=4610"},"modified":"2025-12-08T17:20:02","modified_gmt":"2025-12-08T09:20:02","slug":"%e8%8b%b1%e4%bc%9f%e8%be%be%e8%87%aa%e6%af%81cuda%e9%97%a8%e6%a7%9b%ef%bc%8115%e8%a1%8cpython%e5%86%99gpu%e5%86%85%e6%a0%b8%ef%bc%8c%e6%80%a7%e8%83%bd%e5%8c%b9%e6%95%8c200%e8%a1%8cc","status":"publish","type":"post","link":"https:\/\/www.ihub1.com\/index.php\/2025\/12\/08\/%e8%8b%b1%e4%bc%9f%e8%be%be%e8%87%aa%e6%af%81cuda%e9%97%a8%e6%a7%9b%ef%bc%8115%e8%a1%8cpython%e5%86%99gpu%e5%86%85%e6%a0%b8%ef%bc%8c%e6%80%a7%e8%83%bd%e5%8c%b9%e6%95%8c200%e8%a1%8cc\/","title":{"rendered":"\u82f1\u4f1f\u8fbe\u81ea\u6bc1CUDA\u95e8\u69db\uff0115\u884cPython\u5199GPU\u5185\u6838\uff0c\u6027\u80fd\u5339\u654c200\u884cC++"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"4610\" class=\"elementor elementor-4610\">\n\t\t\t\t<div class=\"elementor-element elementor-element-af2c98b e-flex e-con-boxed e-con e-parent\" data-id=\"af2c98b\" data-element_type=\"container\" data-e-type=\"container\">\n\t\t\t\t\t<div class=\"e-con-inner\">\n\t\t\t\t<div class=\"elementor-element elementor-element-04dbc4d elementor-widget elementor-widget-text-editor\" data-id=\"04dbc4d\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>GPU\u7f16\u7a0b\u53d8\u5929\u4e86\u3002<\/p><p>\u82f1\u4f1f\u8fbe\u53d1\u5e03\u6700\u65b0\u7248<strong>CUDA 13.1<\/strong>\uff0c\u5b98\u65b9\u76f4\u63a5\u5b9a\u6027\uff1a<strong>\u8fd9\u662f\u81ea2006\u5e74\u8bde\u751f\u4ee5\u6765\u6700\u5927\u7684\u8fdb\u6b65<\/strong>\u3002<\/p><p>\u6838\u5fc3\u53d8\u5316\u662f\u63a8\u51fa\u5168\u65b0\u7684<strong>CUDA Tile\u7f16\u7a0b\u6a21\u578b<\/strong>\uff0c\u8ba9\u5f00\u53d1\u8005\u53ef\u4ee5<strong>\u7528Python\u5199GPU\u5185\u6838<\/strong>\uff0c15\u884c\u4ee3\u7801\u5c31\u80fd\u8fbe\u5230200\u884cCUDA C++\u4ee3\u7801\u7684\u6027\u80fd\u3002<\/p><p><img fetchpriority=\"high\" decoding=\"async\" src=\"https:\/\/q5.itc.cn\/q_70\/images03\/20251208\/464306802e674cb3ad544fcb67359e90.jpeg\" width=\"640\" height=\"715\" \/><\/p><p>\u6d88\u606f\u4e00\u51fa\uff0c\u82af\u7247\u754c\u4f20\u5947\u4eba\u7269<strong>Jim Keller<\/strong>\u7acb\u5373\u53d1\u95ee\uff1a<\/p><blockquote><p>\u82f1\u4f1f\u8fbe\u662f\u4e0d\u662f\u4eb2\u624b\u7ec8\u7ed3\u4e86CUDA\u7684\u201c\u62a4\u57ce\u6cb3\u201d\uff1f\u5982\u679c\u82f1\u4f1f\u8fbe\u4e5f\u8f6c\u5411Tile\u6a21\u578b\uff0cAI\u5185\u6838\u5c06\u66f4\u5bb9\u6613\u79fb\u690d\u5230\u5176\u4ed6\u786c\u4ef6\u4e0a\u3002<\/p><\/blockquote><p><img decoding=\"async\" src=\"https:\/\/q1.itc.cn\/q_70\/images03\/20251208\/cdc6556bb2e048eb958927bf95e88bf2.jpeg\" width=\"640\" height=\"159\" \/><\/p><p>Jim Keller\u53c2\u4e0e\u8bbe\u8ba1\u8fc7AMD Zen\u67b6\u6784\u3001\u82f9\u679cA\u7cfb\u5217\u82af\u7247\u3001\u7279\u65af\u62c9\u81ea\u52a8\u9a7e\u9a76\u82af\u7247\u7684\u201d\u7845\u4ed9\u4eba\u201d\uff0c\u4ed6\u7684\u5224\u65ad\u5728\u884c\u4e1a\u91cc\u76f8\u5f53\u6709\u5206\u91cf\u3002<\/p><p>\u90a3\u4e48\u95ee\u9898\u6765\u4e86\uff1aCUDA\u8fd9\u6b21\u5230\u5e95\u6539\u4e86\u4ec0\u4e48\uff1f\u4e3a\u4ec0\u4e48\u4f1a\u88ab\u8ba4\u4e3a\u662f\u201d\u81ea\u6bc1\u957f\u57ce\u201d\uff1f<\/p><p>GPU\u7f16\u7a0b\u8303\u5f0f\u4ece\u201c\u7ebf\u7a0b\u201d\u5230\u201c\u74e6\u7247\u201d<\/p><p>\u8981\u7406\u89e3\u8fd9\u6b21\u66f4\u65b0\u7684\u610f\u4e49\uff0c\u5f97\u5148\u56de\u987e\u4e00\u4e0b\u4f20\u7edfCUDA\u7f16\u7a0b\u6709\u591a\u6298\u78e8\u4eba\u3002<\/p><p>\u8fc7\u53bb20\u5e74\uff0cCUDA\u4e00\u76f4\u91c7\u7528<strong>SIMT\uff08\u5355\u6307\u4ee4\u591a\u7ebf\u7a0b\uff09\u6a21\u578b<\/strong>\uff0c\u5f00\u53d1\u8005\u5199\u4ee3\u7801\u65f6\uff0c\u9700\u8981\u624b\u52a8\u7ba1\u7406\u7ebf\u7a0b\u7d22\u5f15\u3001\u7ebf\u7a0b\u5757\u3001\u5171\u4eab\u5185\u5b58\u5e03\u5c40\u3001\u7ebf\u7a0b\u540c\u6b65\uff0c\u6bcf\u4e00\u4e2a\u7ec6\u8282\u90fd\u8981\u81ea\u5df1\u64cd\u5fc3\u3002<\/p><p>\u60f3\u8981\u5145\u5206\u5229\u7528GPU\u6027\u80fd\uff0c\u7279\u522b\u662f\u7528\u4e0aTensor Core\u8fd9\u7c7b\u4e13\u7528\u6a21\u5757\uff0c\u66f4\u662f\u9700\u8981\u6df1\u539a\u7684\u7ecf\u9a8c\u79ef\u7d2f\u3002<\/p><p><strong>CUDA Tile\u5f7b\u5e95\u6539\u53d8\u4e86\u8fd9\u5957\u73a9\u6cd5\uff1a<\/strong><\/p><p>\u5f00\u53d1\u8005\u4e0d\u518d\u9700\u8981\u9010\u7ebf\u7a0b\u5730\u7f16\u5199\u6267\u884c\u8def\u5f84\uff0c\u800c\u662f\u628a\u6570\u636e\u7ec4\u7ec7\u6210Tile\uff08\u74e6\u7247\uff09\uff0c\u7136\u540e\u5b9a\u4e49\u5728\u8fd9\u4e9bTile\u4e0a\u6267\u884c\u4ec0\u4e48\u8fd0\u7b97\u3002\u81f3\u4e8e\u600e\u4e48\u628a\u8fd9\u4e9b\u8fd0\u7b97\u6620\u5c04\u5230GPU\u7684\u7ebf\u7a0b\u3001Warp\u548cTensor Core\u4e0a\uff0c\u7f16\u8bd1\u5668\u548c\u8fd0\u884c\u65f6\u4f1a\u81ea\u52a8\u641e\u5b9a\u3002<\/p><p><strong>\u5c31\u597d\u50cfNumPy\u4e4b\u4e8ePython\u3002<\/strong><\/p><p><img decoding=\"async\" src=\"https:\/\/q7.itc.cn\/q_70\/images03\/20251208\/c8317a82098b4774912dd7bbde20c45f.jpeg\" width=\"640\" height=\"524\" \/><\/p><p>\u82f1\u4f1f\u8fbe\u4e3a\u6b64\u6784\u5efa\u4e86\u4e24\u4e2a\u6838\u5fc3\u7ec4\u4ef6\uff1a<\/p><p><strong>CUDA Tile IR<\/strong>\u662f\u4e00\u5957\u5168\u65b0\u7684\u865a\u62df\u6307\u4ee4\u96c6\uff0c\u5b83\u5728\u9ad8\u7ea7\u8bed\u8a00\u548c\u786c\u4ef6\u4e4b\u95f4\u52a0\u4e86\u4e00\u5c42\u62bd\u8c61\uff0c\u786e\u4fdd\u57fa\u4e8eTile\u7f16\u5199\u7684\u4ee3\u7801\u80fd\u5728\u4e0d\u540c\u4ee3\u9645\u7684GPU\u4e0a\u8fd0\u884c\uff0c\u4ece\u5f53\u524d\u7684Blackwell\u5230\u672a\u6765\u7684\u67b6\u6784\u90fd\u80fd\u517c\u5bb9\u3002<\/p><p><strong>cuTile Python<\/strong>\u5219\u662f\u9762\u5411\u5f00\u53d1\u8005\u7684\u63a5\u53e3\uff0c\u76f4\u63a5\u7528Python\u5199GPU\u5185\u6838\uff0c\u95e8\u69db\u4e00\u4e0b\u5b50\u4ece\u201cHPC\u4e13\u5bb6\u201c\u964d\u5230\u4e86\u201d\u4f1a\u5199Python\u7684\u6570\u636e\u79d1\u5b66\u5bb6\u90fd\u53ef\u4ee5\u5e72\u3002<\/p><p><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/q0.itc.cn\/q_70\/images03\/20251208\/e3cf666a94b645feb36f4b57a13dd459.jpeg\" width=\"571\" height=\"970\" \/><\/p><p>\u53e6\u5916\uff0c\u8fd9\u6b21\u66f4\u65b0\u8fd8\u5e26\u6765\u4e86\u4e00\u7cfb\u5217\u9762\u5411Blackwell\u7684\u6027\u80fd\u4f18\u5316\uff1a<\/p><ul><li>cuBLAS\u5f15\u5165\u4e86FP64\u548cFP32\u7cbe\u5ea6\u5728Tensor Core\u4e0a\u7684\u4eff\u771f\u529f\u80fd<\/li><li>\u65b0\u589e\u7684Grouped GEMM API\u5728MoE\uff08\u6df7\u5408\u4e13\u5bb6\u6a21\u578b\uff09\u573a\u666f\u4e0b\u80fd\u5b9e\u73b0\u9ad8\u8fbe4\u500d\u52a0\u901f<\/li><li>cuSOLVER\u7684\u6279\u5904\u7406\u7279\u5f81\u5206\u89e3\u5728Blackwell RTX PRO 6000\u4e0a\u76f8\u6bd4L40S\u5b9e\u73b0\u4e86\u7ea62\u500d\u7684\u6027\u80fd\u63d0\u5347<\/li><li>\u5f00\u53d1\u8005\u5de5\u5177Nsight Compute\u65b0\u589e\u4e86\u5bf9CUDA Tile\u5185\u6838\u7684\u6027\u80fd\u5206\u6790\u652f\u6301\uff0c\u53ef\u4ee5\u628a\u6027\u80fd\u6307\u6807\u76f4\u63a5\u6620\u5c04\u56decuTile Python\u6e90\u4ee3\u7801\u3002<\/li><\/ul><p>\u76ee\u524dCUDA Tile\u4ec5\u652f\u6301Blackwell\u67b6\u6784\uff08\u8ba1\u7b97\u80fd\u529b10.x\u548c12.x\uff09\uff0c\u5f00\u53d1\u91cd\u70b9\u96c6\u4e2d\u5728AI\u7b97\u6cd5\u4e0a\u3002\u82f1\u4f1f\u8fbe\u8868\u793a\u672a\u6765\u4f1a\u6269\u5c55\u5230\u66f4\u591a\u67b6\u6784\uff0c\u5e76\u63a8\u51faC++\u5b9e\u73b0\u3002<\/p><p>\u7845\u4ed9\u4eba\u7684\u8d28\u7591\uff1a\u964d\u4f4e\u95e8\u69db\u662f\u4e00\u628a\u53cc\u5203\u5251<\/p><p>\u90a3\u4e48Jim Keller\u4e3a\u4ec0\u4e48\u8bf4\u82f1\u4f1f\u8fbe\u53ef\u80fd\u201d\u7ec8\u7ed3\u4e86\u81ea\u5df1\u7684\u62a4\u57ce\u6cb3\u201d\uff1f<\/p><p>\u5173\u952e\u5c31\u5728\u4e8eTile\u7f16\u7a0b\u6a21\u578b\u4e0d\u662f\u82f1\u4f1f\u8fbe\u72ec\u6709\u7684\u3002A<strong>MD\u3001Intel\u4ee5\u53ca\u5176\u4ed6AI\u82af\u7247\u5382\u5546\u7684\u786c\u4ef6\uff0c\u5728\u5e95\u5c42\u67b6\u6784\u4e0a\u540c\u6837\u53ef\u4ee5\u652f\u6301\u57fa\u4e8eTile\u7684\u7f16\u7a0b\u62bd\u8c61\u3002<\/strong><\/p><p>\u8fc7\u53bbCUDA\u96be\u4ee5\u79fb\u690d\uff0c\u5f88\u5927\u7a0b\u5ea6\u4e0a\u662f\u56e0\u4e3a<strong>SIMT\u6a21\u578b\u4e0e\u82f1\u4f1f\u8fbe\u786c\u4ef6\u6df1\u5ea6\u7ed1\u5b9a<\/strong>\uff0c\u5f00\u53d1\u8005\u8981\u9488\u5bf9\u5177\u4f53\u7684GPU\u67b6\u6784\u624b\u5199\u4f18\u5316\u4ee3\u7801\u3002\u8fd9\u4e9b\u4ee3\u7801\u6362\u5230\u522b\u5bb6\u786c\u4ef6\u4e0a\uff0c\u8981\u4e48\u8dd1\u4e0d\u4e86\uff0c\u8981\u4e48\u6027\u80fd\u5927\u6253\u6298\u6263\u3002<\/p><p>\u4f46Tile\u6a21\u578b\u5929\u7136\u5177\u6709\u66f4\u9ad8\u7684\u62bd\u8c61\u5c42\u6b21\u3002\u5f53\u5f00\u53d1\u8005\u4e60\u60ef\u4e86\u201c\u53ea\u7ba1\u5b9a\u4e49Tile\u8fd0\u7b97\uff0c\u786c\u4ef6\u7ec6\u8282\u4ea4\u7ed9\u7f16\u8bd1\u5668\u201d\u8fd9\u79cd\u601d\u7ef4\u65b9\u5f0f\u540e\uff0c\u7406\u8bba\u4e0a\u540c\u4e00\u5957\u7b97\u6cd5\u903b\u8f91\u66f4\u5bb9\u6613\u9002\u914d\u5230\u5176\u4ed6\u652f\u6301Tile\u7f16\u7a0b\u7684\u786c\u4ef6\u4e0a\u3002<\/p><p>\u6b63\u5982Jim Keller\u6240\u8bf4\uff1a\u201dAI\u5185\u6838\u5c06\u66f4\u5bb9\u6613\u79fb\u690d\u3002\u201d<\/p><p>\u4e0d\u8fc7\u82f1\u4f1f\u8fbe\u4e5f\u8003\u8651\u4e86\u540e\u624b\uff0cCUDA Tile IR\u63d0\u4f9b\u4e86\u8de8\u4ee3\u517c\u5bb9\u6027\uff0c\u4f46\u8fd9\u79cd\u517c\u5bb9\u6027\u662f\u5efa\u7acb\u5728CUDA\u5e73\u53f0\u4e4b\u4e0a\u7684\u3002<\/p><p>\u5f00\u53d1\u8005\u5199\u7684\u4ee3\u7801\u786e\u5b9e\u66f4\u5bb9\u6613\u79fb\u690d\u4e86\uff0c\u4f46\u79fb\u690d\u7684\u76ee\u6807\u662f\u82f1\u4f1f\u8fbe\u81ea\u5bb6\u7684\u4e0d\u540c\u4ee3GPU\uff0c\u800c\u975e\u7ade\u4e89\u5bf9\u624b\u7684\u786c\u4ef6\u3002<\/p><p>\u4ece\u8fd9\u4e2a\u89d2\u5ea6\u770b\uff0cCUDA\u4ee3\u7801\u53ef\u4ee5\u4eceBlackwell\u65e0\u7f1d\u8fc1\u79fb\u5230\u4e0b\u4e00\u4ee3\u82f1\u4f1f\u8fbeGPU\uff0c\u4f46\u8981\u8fc1\u79fb\u5230AMD\u6216Intel\u7684\u5e73\u53f0\u4e0a\uff0c\u4f9d\u7136\u9700\u8981\u91cd\u5199\u3002<\/p><p>\u4e0d\u7ba1\u62a4\u57ce\u6cb3\u662f\u52a0\u6df1\u8fd8\u662f\u524a\u5f31\uff0c\u6709\u4e00\u70b9\u662f\u786e\u5b9a\u7684\uff1aGPU\u7f16\u7a0b\u7684\u95e8\u69db\u786e\u5b9e\u5728\u5927\u5e45\u964d\u4f4e\u3002<\/p><p>\u8fc7\u53bb\u80fd\u719f\u7ec3\u9a7e\u9a6dCUDA\u7684\u5f00\u53d1\u8005\u662f\u7a00\u7f3a\u8d44\u6e90\uff0c\u4f1a\u5199Python\u7684\u4eba\u4e00\u6293\u4e00\u5927\u628a\uff0c\u4f46\u80fd\u628a\u4ee3\u7801\u4f18\u5316\u5230\u8dd1\u6ee1Tensor Core\u7684\u4e13\u5bb6\u5be5\u5be5\u65e0\u51e0\u3002<\/p><p>CUDA Tile\u548ccuTile Python\u6253\u901a\u4e86\u8fd9\u4e2a\u74f6\u9888\u3002\u82f1\u4f1f\u8fbe\u5728\u5f00\u53d1\u8005\u535a\u5ba2\u4e2d\u63d0\u5230\uff0c\u4e00\u4e2a15\u884c\u7684Python\u5185\u6838\u6027\u80fd\u53ef\u4ee5\u5ab2\u7f8e200\u884c\u624b\u52a8\u4f18\u5316\u7684CUDA C++\u4ee3\u7801\u3002<\/p><p>\u5927\u91cf\u6570\u636e\u79d1\u5b66\u5bb6\u548cAI\u7814\u7a76\u8005\u4ece\u6b64\u53ef\u4ee5\u76f4\u63a5\u4e0a\u624b\u5199\u9ad8\u6027\u80fdGPU\u4ee3\u7801\uff0c\u4e0d\u7528\u518d\u7b49HPC\u4e13\u5bb6\u6765\u5e2e\u5fd9\u4f18\u5316\u3002<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>\u68a6\u6668 \u53d1\u81ea \u51f9\u975e\u5bfa<\/p>\n","protected":false},"author":1,"featured_media":4612,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-4610","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-uncategorized"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts\/4610","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/comments?post=4610"}],"version-history":[{"count":7,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts\/4610\/revisions"}],"predecessor-version":[{"id":4618,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/posts\/4610\/revisions\/4618"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/media\/4612"}],"wp:attachment":[{"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/media?parent=4610"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/categories?post=4610"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.ihub1.com\/index.php\/wp-json\/wp\/v2\/tags?post=4610"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}