{"id":1192,"date":"2024-02-26T10:42:03","date_gmt":"2024-02-26T02:42:03","guid":{"rendered":"https:\/\/xujiwei.com\/blog\/?p=1192"},"modified":"2024-02-26T10:42:03","modified_gmt":"2024-02-26T02:42:03","slug":"codegeex2-local-deploy","status":"publish","type":"post","link":"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/","title":{"rendered":"\u7b14\u8bb0\u672c RTX 3070 + 8G \u663e\u5b58\u672c\u5730\u8fd0\u884c\u5927\u8bed\u8a00\u6a21\u578b CodeGeeX2 \u751f\u6210\u4ee3\u7801\u5b9e\u8df5"},"content":{"rendered":"<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_65 ez-toc-wrap-right counter-hierarchy ez-toc-counter ez-toc-light-blue ez-toc-container-direction\">\n<p class=\"ez-toc-title\">\u6587\u7ae0\u76ee\u5f55<\/p>\n<label for=\"ez-toc-cssicon-toggle-item-69d06d34c7880\" class=\"ez-toc-cssicon-toggle-label\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/label><input type=\"checkbox\"  id=\"ez-toc-cssicon-toggle-item-69d06d34c7880\"  aria-label=\"Toggle\" \/><nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E7%B3%BB%E7%BB%9F%E7%8E%AF%E5%A2%83\" title=\"\u7cfb\u7edf\u73af\u5883\">\u7cfb\u7edf\u73af\u5883<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E5%AE%89%E8%A3%85_Python_%E7%8E%AF%E5%A2%83\" title=\"\u5b89\u88c5 Python \u73af\u5883\">\u5b89\u88c5 Python \u73af\u5883<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E5%85%8B%E9%9A%86%E6%A8%A1%E5%9E%8B%E6%96%87%E4%BB%B6\" title=\"\u514b\u9686\u6a21\u578b\u6587\u4ef6\">\u514b\u9686\u6a21\u578b\u6587\u4ef6<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E8%B8%A9%E5%9D%91%E8%AE%B0%E5%BD%95\" title=\"\u8e29\u5751\u8bb0\u5f55\">\u8e29\u5751\u8bb0\u5f55<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E2%80%98ChatGLMTokenizer_object_has_no_attribute_%E2%80%98tokenizer\" title=\"&#8216;ChatGLMTokenizer&#8217; object has no attribute &#8216;tokenizer&#8217;\">&#8216;ChatGLMTokenizer&#8217; object has no attribute &#8216;tokenizer&#8217;<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#Torch_not_compiled_with_CUDA_enabled\" title=\"Torch not compiled with CUDA enabled\">Torch not compiled with CUDA enabled<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-7\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#RuntimeError_Library_cublasLt_is_not_initialized\" title=\"RuntimeError: Library cublasLt is not initialized\">RuntimeError: Library cublasLt is not initialized<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-8\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E8%BF%90%E8%A1%8C%E6%A8%A1%E5%9E%8B\" title=\"\u8fd0\u884c\u6a21\u578b\">\u8fd0\u884c\u6a21\u578b<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-9\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E4%BD%BF%E7%94%A8%E4%BB%A3%E7%A0%81%E8%B0%83%E7%94%A8_CodeGeeX2_%E7%94%9F%E6%88%90%E4%BB%A3%E7%A0%81\" title=\"\u4f7f\u7528\u4ee3\u7801\u8c03\u7528 CodeGeeX2 \u751f\u6210\u4ee3\u7801\">\u4f7f\u7528\u4ee3\u7801\u8c03\u7528 CodeGeeX2 \u751f\u6210\u4ee3\u7801<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-10\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E4%BD%BF%E7%94%A8_API_%E8%B0%83%E7%94%A8_CodeGeeX2_%E7%94%9F%E6%88%90%E4%BB%A3%E7%A0%81\" title=\"\u4f7f\u7528 API \u8c03\u7528 CodeGeeX2 \u751f\u6210\u4ee3\u7801\">\u4f7f\u7528 API \u8c03\u7528 CodeGeeX2 \u751f\u6210\u4ee3\u7801<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-11\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E5%B0%8F%E7%BB%93\" title=\"\u5c0f\u7ed3\">\u5c0f\u7ed3<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-12\" href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/#%E5%8F%82%E8%80%83%E8%B5%84%E6%96%99\" title=\"\u53c2\u8003\u8d44\u6599\">\u53c2\u8003\u8d44\u6599<\/a><\/li><\/ul><\/nav><\/div>\n<p>\u4e4b\u524d\u770b\u5230 CodeGeeX2 \u5927\u8bed\u8a00\u6a21\u578b\u53d1\u5e03\u4e86\uff0c\u4ecb\u7ecd\u8bf4\u6bd4\u4e0a\u4e00\u4e2a\u7248\u672c\u6548\u679c\u8981\u597d\u4e0a\u4e0d\u5c11\uff0c\u56e0\u6b64\u4e5f\u60f3\u5728\u672c\u5730\u8dd1\u4e00\u4e2a\u770b\u770b\uff0c\u80fd\u4e0d\u80fd\u5728\u9879\u76ee\u5f00\u53d1\u7684\u6d41\u7a0b\u4e2d\u80fd\u5e94\u7528\u4e00\u4e0b\u3002<\/p>\n<blockquote>\n<p>CodeGeeX2 \u662f\u591a\u8bed\u8a00\u4ee3\u7801\u751f\u6210\u6a21\u578b <a href=\"https:\/\/github.com\/THUDM\/CodeGeeX\">CodeGeeX<\/a> (<a href=\"https:\/\/arxiv.org\/abs\/2303.17568\">KDD\u201923<\/a>) \u7684\u7b2c\u4e8c\u4ee3\u6a21\u578b\u3002\u4e0d\u540c\u4e8e\u4e00\u4ee3 CodeGeeX\uff08\u5b8c\u5168\u5728\u56fd\u4ea7\u534e\u4e3a\u6607\u817e\u82af\u7247\u5e73\u53f0\u8bad\u7ec3\uff09 \uff0cCodeGeeX2 \u662f\u57fa\u4e8e <a href=\"https:\/\/github.com\/THUDM\/ChatGLM2-6B\">ChatGLM2<\/a> \u67b6\u6784\u52a0\u5165\u4ee3\u7801\u9884\u8bad\u7ec3\u5b9e\u73b0\uff0c\u5f97\u76ca\u4e8e ChatGLM2 \u7684\u66f4\u4f18\u6027\u80fd\uff0cCodeGeeX2 \u5728\u591a\u9879\u6307\u6807\u4e0a\u53d6\u5f97\u6027\u80fd\u63d0\u5347\uff08+107% &gt; CodeGeeX\uff1b\u4ec560\u4ebf\u53c2\u6570\u5373\u8d85\u8fc7150\u4ebf\u53c2\u6570\u7684 StarCoder-15B \u8fd110%\uff09<\/p>\n<\/blockquote>\n<p>\u4e0d\u8fc7\u56e0\u4e3a\u624b\u4e0a\u53ea\u6709\u4e00\u4e2a\u7b14\u8bb0\u672c\u7528\u7684 Nvdia \u663e\u5361\uff0c\u578b\u53f7\u662f 3070\uff0c\u5e76\u4e14\u663e\u5b58\u8fd8\u53ea\u6709 8G\uff0c\u56e0\u6b64\u5728\u8fd0\u884c\u8fc7\u7a0b\u4e2d\u76f8\u5bf9\u66f4\u9ad8\u663e\u5b58\u7684\u663e\u5361\u4f1a\u78b0\u5230\u4e00\u4e9b\u5751\uff0c\u8fd9\u91cc\u5c31\u8bb0\u5f55\u4e00\u4e0b\u6574\u4e2a\u5b89\u88c5\u8fc7\u7a0b\u3001\u8e29\u7684\u5751\u548c\u89e3\u51b3\u65b9\u6cd5\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E7%B3%BB%E7%BB%9F%E7%8E%AF%E5%A2%83\"><\/span>\u7cfb\u7edf\u73af\u5883<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u6211\u7684\u7b14\u8bb0\u672c\u5b89\u88c5\u4e86 Windows 10 \u7cfb\u7edf\uff0c\u5e76\u4e14\u8fd8\u6ca1\u6709\u5b89\u88c5 WSL\uff0c\u56e0\u6b64\u5c31\u76f4\u63a5\u5728 Windows \u7cfb\u7edf\u4e2d\u6765\u5b89\u88c5\u548c\u90e8\u7f72 CodeGeeX2 \u4e86\u3002<\/p>\n<p>\u8f6f\u4ef6\u548c\u786c\u4ef6\u73af\u5883\uff1a<\/p>\n<ul>\n<li>OS\uff1a Windows 10 22H2 (19045)<\/li>\n<li>Python\uff1a3.10.11<\/li>\n<li>CPU\uff1aAMD Ryzen 7 5800H<\/li>\n<li>\u5185\u5b58\uff1a32GB<\/li>\n<li>\u663e\u5361\uff1aRTX 3070 Laptop\uff0c8GB \u663e\u5b58<\/li>\n<li>\u663e\u5361\u9a71\u52a8\uff1aNVDIA Studio 536.40<\/li>\n<\/ul>\n<p>\u5e76\u4e14\u540e\u7eed\u6240\u6709\u6a21\u578b\u548c\u6d4b\u8bd5\u76f8\u5173\u6587\u4ef6\uff0c\u90fd\u4f1a\u653e\u5230 <code>E:\\AI\\CodeGeeX2<\/code> \u76ee\u5f55\u4e2d\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E5%AE%89%E8%A3%85_Python_%E7%8E%AF%E5%A2%83\"><\/span>\u5b89\u88c5 Python \u73af\u5883<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<pre><code class=\"language-shell\">cd AI\\CodeGeeX2\n\n# \u521b\u5efa VirtualEnv\npython -m venv .venv\n\n# \u6fc0\u6d3b VirtualEnv\n.venv\\Scripts\\activate\n\n# \u5b89\u88c5\u4f9d\u8d56\npip install protobuf cpm_kernels torch&gt;=2.0 gradio mdtex2html sentencepiece accelerate modelscope<\/code><\/pre>\n<h2><span class=\"ez-toc-section\" id=\"%E5%85%8B%E9%9A%86%E6%A8%A1%E5%9E%8B%E6%96%87%E4%BB%B6\"><\/span>\u514b\u9686\u6a21\u578b\u6587\u4ef6<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u8fd9\u91cc\u53ef\u4ee5\u76f4\u63a5\u4f7f\u7528 modelscope \u7684\u955c\u50cf\uff0c\u901f\u5ea6\u4f1a\u6bd4\u4ece GitHub \u514b\u9686\u8981\u5feb\u4e00\u4e9b\uff1a<\/p>\n<pre><code class=\"language-shell\"># \u6ce8\u610f\u8fd9\u91cc\u9700\u8981\u5728 E:\\AI\\CodeGeeX2 \u76ee\u5f55\u4e2d\ngit clone https:\/\/www.modelscope.cn\/ZhipuAI\/CodeGeeX2-6B.git<\/code><\/pre>\n<p>\u672c\u6765\u4ee5\u4e3a\u9700\u8981\u4f7f\u7528\u91cf\u5316\u7248\u672c codegeex2-6b-int4\uff0c\u4f46\u662f\u540e\u6765\u5728\u4f7f\u7528\u65f6\uff0c\u91cf\u5316\u7248\u672c\u53cd\u800c\u52a0\u8f7d\u4f1a\u51fa\u9519\uff0c\u4f7f\u7528\u539f\u59cb\u7248\u672c\u53cd\u800c\u6ca1\u95ee\u9898\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E8%B8%A9%E5%9D%91%E8%AE%B0%E5%BD%95\"><\/span>\u8e29\u5751\u8bb0\u5f55<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<h3>'ChatGLMTokenizer' object has no attribute 'tokenizer'<\/h3>\n<p>\u8fd9\u4e2a\u9700\u8981\u91cd\u65b0\u5b89\u88c5\u4e00\u4e2a\u7279\u5b9a\u7248\u672c\u7684 <code>transformers<\/code> \u5305\u6765\u89e3\u51b3<\/p>\n<pre><code class=\"language-shell\">pip uninstall transformers\npip install transformers==4.33.2<\/code><\/pre>\n<p>\u53c2\u8003 issue\uff1a<a href=\"https:\/\/github.com\/chatchat-space\/Langchain-Chatchat\/issues\/1835\">https:\/\/github.com\/chatchat-space\/Langchain-Chatchat\/issues\/1835<\/a><\/p>\n<h3><span class=\"ez-toc-section\" id=\"Torch_not_compiled_with_CUDA_enabled\"><\/span>Torch not compiled with CUDA enabled<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u8fd9\u662f\u9ed8\u8ba4\u4f7f\u7528 pip \u5b89\u88c5\u7684 torch \u5305\u5e76\u6ca1\u6709\u5305\u542b CUDA \u652f\u6301\uff0c\u9700\u8981\u6839\u636e\u8981\u5b89\u88c5\u7684\u64cd\u4f5c\u7cfb\u7edf\u548c CUDA \u7248\u672c\uff0c\u5728 PyTorch \u7f51\u7ad9\u83b7\u53d6\u6307\u5b9a\u7684\u5b89\u88c5\u547d\u4ee4\u6765\u89e3\u51b3\u3002<\/p>\n<p>\u5bf9\u4e8e Windows + Python + CUDA 11.8\uff0c\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u547d\u4ee4\u6765\u5b89\u88c5\uff1a<\/p>\n<pre><code class=\"language-bash\">pip3 install torch torchvision torchaudio --index-url https:\/\/download.pytorch.org\/whl\/cu118<\/code><\/pre>\n<h3><span class=\"ez-toc-section\" id=\"RuntimeError_Library_cublasLt_is_not_initialized\"><\/span>RuntimeError: Library cublasLt is not initialized<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u901a\u5e38\u8fd9\u4e2a\u9519\u8bef\u53d1\u751f\u65f6\uff0c\u6253\u5f00\u4efb\u52a1\u7ba1\u7406\u5668\u53ef\u4ee5\u770b\u5230\u663e\u5b58\u5df2\u7ecf\u7206\u4e86\uff0c\u4f1a\u76f4\u63a5\u51b2\u5230 8G \u7136\u540e\u6389\u56de\u53bb\u3002<\/p>\n<p>\u8fd9\u4e2a\u65f6\u5019\u9700\u8981\u4f7f\u7528 8bit \u91cf\u5316\u65b9\u5f0f\u8fd0\u884c\u6a21\u578b\uff0c\u5b89\u88c5 <code>accelerate<\/code> \u548c <code>bitsandbytes<\/code> \u8fd9\u4e24\u4e2a\u5305\u3002<\/p>\n<pre><code class=\"language-bash\">pip install accelerate\npip install bitsandbytes<\/code><\/pre>\n<p>\u5728\u5b89\u88c5\u5b8c\u540e\u8fd8\u662f\u4f1a\u63d0\u793a\u627e\u4e0d\u5230\u76f8\u5173\u7684 dll\uff0c\u9700\u8981\u4e0b\u8f7d\u9884\u7f16\u8bd1 dll \u548c\u4fee\u6539\u4e00\u4e0b <code>bitsandbytes<\/code> \u7684\u6e90\u4ee3\u7801\u3002<\/p>\n<p><strong>\u4e0b\u8f7d libbitsandbytes_cuda116.dll<\/strong><\/p>\n<p>\u5728 <a href=\"https:\/\/github.com\/DeXtmL\/bitsandbytes-win-prebuilt\">https:\/\/github.com\/DeXtmL\/bitsandbytes-win-prebuilt<\/a> \u4e0b\u8f7d\u9884\u7f16\u8bd1\u7684 libbitsandbytes_cuda116.dll\uff0c\u5e76\u5c06\u5b83\u653e\u5230\u4ee5\u4e0b\u6587\u4ef6\u5939\u4e2d\uff1a<\/p>\n<pre><code class=\"language-bash\">.venv\\Lib\\site-packages\\bitsandbytes<\/code><\/pre>\n<p>\u5728\u8fd9\u4e2a\u76ee\u5f55\u4e2d\u5df2\u7ecf\u6709\u5176\u4ed6\u7248\u672c\u7684 so \u7b49\u6587\u4ef6\uff0c\u6ce8\u610f\u8fd9\u4e2a dll \u6587\u4ef6\u7684\u6587\u4ef6\u5939\u548c \u8981\u4fee\u6539\u7684\u6e90\u4ee3\u7801\u5e76\u4e0d\u5728\u540c\u4e00\u4e2a\u76ee\u5f55\u3002<\/p>\n<p><strong>\u4fee\u6539 evaluate_cuda_setup<\/strong><\/p>\n<p>\u6587\u4ef6\u8def\u5f84\uff1a<code>.venv\\Lib\\site-packages\\bitsandbytes\\cuda_setup\\main.py<\/code><\/p>\n<p><img src=\"https:\/\/xujiwei.com\/blog\/wp-content\/uploads\/2024\/02\/run-codegeex2-modify-bitsandbytes1.png\" alt=\"image-20240215155258262\" style=\"max-width:500px;\" \/><\/p>\n<p>\u5148\u627e\u5230 <code>return libbitsandbytes_cpu.so<\/code>\uff0c\u628a\u8fd9\u4e00\u884c\u6ce8\u91ca\u6389\uff0c\u5e76\u66ff\u6362\u4e3a\u4ee5\u4e0b\u5185\u5bb9\uff1a<\/p>\n<pre><code class=\"language-python\">if torch.cuda.is_available(): return &#039;libbitsandbytes_cuda116.dll&#039;, None, None, None, None<\/code><\/pre>\n<p>\u9700\u8981\u6ce8\u610f Python \u4ee3\u7801\u7684\u7f29\u8fdb\u8981\u548c\u4e0a\u4e00\u884c\u5bf9\u9f50\u3002<\/p>\n<p><strong>\u4fee\u6539 run_cuda_setup<\/strong><\/p>\n<p>\u6587\u4ef6\u8def\u5f84\uff1a<code>.venv\\Lib\\site-packages\\bitsandbytes\\cuda_setup\\main.py<\/code><\/p>\n<p>\u8fd9\u4e2a\u4fee\u6539\u548c\u4e0a\u9762\u662f\u5728\u540c\u4e00\u4e2a\u6587\u4ef6\u4e2d\u3002<\/p>\n<p><img src=\"https:\/\/xujiwei.com\/blog\/wp-content\/uploads\/2024\/02\/run-codegeex2-modify-bitsandbytes2.png\" alt=\"image-20240215155533484\" style=\"max-width:500px;\" \/><\/p>\n<p>\u5728\u6587\u4ef6\u91cc\u641c\u7d22 <code>ct.cdll.LoadLibrary(binary_path)<\/code> \uff0c\u8fd9\u4e2a\u6709\u4e24\u5904\uff0c\u90fd\u5c06\u5b83\u4eec\u66ff\u6362\u4e3a\u4ee5\u4e0b\u5185\u5bb9\uff1a<\/p>\n<pre><code class=\"language-python\">self.lib = ct.cdll.LoadLibrary(str(binary_path))<\/code><\/pre>\n<p>\u540c\u6837\u9700\u8981\u6ce8\u610f\u5bf9\u9f50\u7f29\u8fdb\u3002<\/p>\n<p>\u53c2\u8003\u6587\u7ae0\uff1a<\/p>\n<ul>\n<li><a href=\"https:\/\/github.com\/THUDM\/ChatGLM-6B\/issues\/347\">https:\/\/github.com\/THUDM\/ChatGLM-6B\/issues\/347<\/a><\/li>\n<li><a href=\"https:\/\/github.com\/oobabooga\/text-generation-webui\/issues\/147\">https:\/\/github.com\/oobabooga\/text-generation-webui\/issues\/147<\/a><\/li>\n<\/ul>\n<h2><span class=\"ez-toc-section\" id=\"%E8%BF%90%E8%A1%8C%E6%A8%A1%E5%9E%8B\"><\/span>\u8fd0\u884c\u6a21\u578b<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<h3><span class=\"ez-toc-section\" id=\"%E4%BD%BF%E7%94%A8%E4%BB%A3%E7%A0%81%E8%B0%83%E7%94%A8_CodeGeeX2_%E7%94%9F%E6%88%90%E4%BB%A3%E7%A0%81\"><\/span>\u4f7f\u7528\u4ee3\u7801\u8c03\u7528 CodeGeeX2 \u751f\u6210\u4ee3\u7801<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p><strong>\u6d4b\u8bd5\u4ee3\u7801 test.py<\/strong><\/p>\n<pre><code class=\"language-python\">from modelscope import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(&quot;E:\\AI\\CodeGeeX2\\CodeGeeX2-6B&quot;, trust_remote_code=True)\nmodel = AutoModel.from_pretrained(&quot;E:\\AI\\CodeGeeX2\\CodeGeeX2-6B&quot;, trust_remote_code=True).quantize(8).cuda()\nmodel = model.eval()\n\n# remember adding a language tag for better performance\nprompt = &quot;# language: Python\\n# \u7528python\u5199\u4e00\u4e2a\u5192\u6ce1\u6392\u5e8f\u7b97\u6cd5\uff0c\u5e76\u7528\u4e2d\u6587\u9010\u884c\u6ce8\u91ca\\n&quot;\ninputs = tokenizer.encode(prompt, return_tensors=&quot;pt&quot;, padding=True, truncation=True).to(model.device)\noutputs = model.generate(inputs, max_length=256, top_k=1)\nresponse = tokenizer.decode(outputs[0])\n\nprint(response)<\/code><\/pre>\n<p><strong>\u8fd0\u884c\u6d4b\u8bd5\u4ee3\u7801<\/strong><\/p>\n<p>\u5728\u547d\u4ee4\u884c\u4e2d\u901a\u8fc7 <code>python test.py<\/code> \u5c31\u53ef\u4ee5\u8fd0\u884c\u6d4b\u8bd5\u4ee3\u7801\uff1a<\/p>\n<p><img src=\"https:\/\/xujiwei.com\/blog\/wp-content\/uploads\/2024\/02\/run-codegeex2-test1.png\" alt=\"image-20240215201241226\" style=\"max-width:500px;\" \/><\/p>\n<p>\u540c\u65f6\u53ef\u4ee5\u5728\u4efb\u52a1\u7ba1\u7406\u5668\u770b\u5230 GPU \u663e\u5b58\u5360\u7528\u9010\u6e10\u5c31\u51e0\u4e4e\u5230\u8fbe 100%\uff1a<\/p>\n<p><img src=\"https:\/\/xujiwei.com\/blog\/wp-content\/uploads\/2024\/02\/run-codegeex2-test-vram.png\" alt=\"image-20240215201627298\" style=\"max-width:500px;\" \/><\/p>\n<p><strong>\u8f93\u51fa\u7ed3\u679c\u4ee3\u7801<\/strong><\/p>\n<pre><code class=\"language-python\"># language: Python\n# \u7528python\u5199\u4e00\u4e2a\u5192\u6ce1\u6392\u5e8f\u7b97\u6cd5\uff0c\u5e76\u7528\u4e2d\u6587\u9010\u884c\u6ce8\u91ca\n\ndef bubble_sort(alist):\n    &quot;&quot;&quot;\n    \u5192\u6ce1\u6392\u5e8f\n    :param alist: \u9700\u8981\u6392\u5e8f\u7684\u5217\u8868\n    :return: \u6392\u5e8f\u540e\u7684\u5217\u8868\n    &quot;&quot;&quot;\n    for i in range(len(alist) - 1, 0, -1):\n        for j in range(i):\n            if alist[j] &gt; alist[j + 1]:\n                alist[j], alist[j + 1] = alist[j + 1], alist[j]\n    return alist\n\nif __name__ == &quot;__main__&quot;:\n    print(bubble_sort([5, 2, 4, 6, 1, 3]))<\/code><\/pre>\n<h3><span class=\"ez-toc-section\" id=\"%E4%BD%BF%E7%94%A8_API_%E8%B0%83%E7%94%A8_CodeGeeX2_%E7%94%9F%E6%88%90%E4%BB%A3%E7%A0%81\"><\/span>\u4f7f\u7528 API \u8c03\u7528 CodeGeeX2 \u751f\u6210\u4ee3\u7801<span class=\"ez-toc-section-end\"><\/span><\/h3>\n<p>\u4f7f\u7528\u6d4b\u8bd5\u6587\u4ef6\u6765\u6d4b\u8bd5\u5c31\u662f\u4f1a\u5bfc\u81f4\u6bcf\u6b21\u8fd0\u884c\u7684\u65f6\u5019\u90fd\u9700\u8981\u91cd\u65b0\u52a0\u8f7d\u4e00\u6b21\u6a21\u578b\uff0c\u8fd9\u4e2a\u8fc7\u7a0b\u8fd8\u662f\u6bd4\u8f83\u8d39\u65f6\u95f4\u7684\uff0c\u56e0\u6b64\u53ef\u4ee5\u901a\u8fc7 FastAPI \u6765\u8fd0\u884c\u4e00\u4e2a API \u670d\u52a1\u5668\uff0c\u8fd9\u6837\u5c31\u53ef\u4ee5\u901a\u8fc7 HTTP \u8bf7\u6c42\u7684\u65b9\u5f0f\u6765\u751f\u6210\u4ee3\u7801\uff0c\u907f\u514d\u6bcf\u6b21\u90fd\u8981\u91cd\u65b0\u52a0\u8f7d\u6a21\u578b\u6587\u4ef6\u3002<\/p>\n<p><strong>API \u670d\u52a1\u5668\u6d4b\u8bd5\u6587\u4ef6 api_test.py<\/strong><\/p>\n<pre><code class=\"language-python\">from fastapi import FastAPI, Request\nfrom modelscope import AutoTokenizer, AutoModel\nimport uvicorn, json, datetime\nimport torch\n\napp = FastAPI()\n\n@app.post(&quot;\/&quot;)\nasync def create_item(request: Request):\n    global model, tokenizer\n    json_post_raw = await request.json()\n    json_post = json.dumps(json_post_raw)\n    json_post_list = json.loads(json_post)\n    prompt = json_post_list.get(&#039;prompt&#039;)\n    inputs = tokenizer.encode(prompt, return_tensors=&quot;pt&quot;, padding=True, truncation=True).to(model.device)\n    outputs = model.generate(inputs, max_length=256, top_k=1)\n    response = tokenizer.decode(outputs[0])\n    now = datetime.datetime.now()\n    time = now.strftime(&quot;%Y-%m-%d %H:%M:%S&quot;)\n    answer = {\n        &quot;response&quot;: response,\n        &quot;status&quot;: 200,\n        &quot;time&quot;: time\n    }\n    log = &quot;[&quot; + time + &quot;] &quot; + &#039;&quot;, prompt:&quot;&#039; + prompt + &#039;&quot;, response:&quot;&#039; + repr(response) + &#039;&quot;&#039;\n    print(log)\n\n    return answer\n\nif __name__ == &#039;__main__&#039;:\n    tokenizer = AutoTokenizer.from_pretrained(&quot;E:\\AI\\CodeGeeX2\\CodeGeeX2-6B&quot;, trust_remote_code=True)\n    model = AutoModel.from_pretrained(&quot;E:\\AI\\CodeGeeX2\\CodeGeeX2-6B&quot;, trust_remote_code=True).quantize(8).cuda()\n    model = model.eval()\n    uvicorn.run(app, host=&#039;127.0.0.1&#039;, port=7860, workers=1)<\/code><\/pre>\n<p><strong>\u8fd0\u884c API \u670d\u52a1\u5668<\/strong><\/p>\n<p>\u5728\u547d\u4ee4\u884c\u4e2d\u901a\u8fc7 <code>python api_test.py<\/code> \u6765\u8fd0\u884c API \u670d\u52a1\u5668\uff0c\u4f1a\u63d0\u793a\u5df2\u7ecf \u5728 <code>http:\/\/127.0.0.1:7860<\/code> \u8fd9\u4e2a\u5730\u5740\u4e0a\u8fd0\u884c\u8d77\u6765\uff1a<\/p>\n<p><img src=\"https:\/\/xujiwei.com\/blog\/wp-content\/uploads\/2024\/02\/run-codegeex2-api-server.png\" alt=\"image-20240215203144795\" style=\"zoom:50%;\" \/><\/p>\n<p><strong>\u901a\u8fc7 API \u8bf7\u6c42\u83b7\u53d6\u7ed3\u679c<\/strong><\/p>\n<p>\u8fd9\u4e2a\u65f6\u5019\u4f7f\u7528\u50cf Reqable \u7684 API \u6d4b\u8bd5\u5de5\u5177\uff0c\u5c31\u53ef\u4ee5\u53d1\u9001\u4e00\u4e2a\u8bf7\u6c42\u7ed9\u5230\u670d\u52a1\u5668\uff0c\u6765\u83b7\u53d6\u5bf9\u5e94\u7684\u4ee3\u7801\u751f\u6210\u7ed3\u679c\u4e86\u3002<\/p>\n<p><img src=\"https:\/\/xujiwei.com\/blog\/wp-content\/uploads\/2024\/02\/run-codegeex2-api-test.png\" alt=\"image-20240215202846636\" style=\"max-width:500px;\" \/><\/p>\n<p>\u4e0a\u9762\u8fd9\u4e2a\u8fd4\u56de\u7684 API \u54cd\u5e94\u5c31\u8ddf\u4e4b\u524d\u4f7f\u7528\u5355\u6b21\u6d4b\u8bd5\u751f\u6210\u7684\u7ed3\u679c\u4e00\u6837\u4e86\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E5%B0%8F%E7%BB%93\"><\/span>\u5c0f\u7ed3<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<p>\u5c0f\u663e\u5b58\u8dd1\u5927\u8bed\u8a00\u6a21\u578b\u8fd8\u662f\u6bd4\u8f83\u591a\u5751\u7684\uff0c\u914d\u7f6e\u8dd1\u6a21\u578b\u7684 Python \u73af\u5883\u4e5f\u6bd4\u8f83\u590d\u6742\uff0c\u611f\u8c22\u7f51\u4e0a\u8bf8\u591a\u5df2\u7ecf\u8e29\u8fc7\u5751\u7684\u524d\u8f88\u7ed9\u51fa\u7684\u5404\u79cd\u89e3\u51b3\u65b9\u6848\u3002<\/p>\n<p>\u53e6\u5916\u4f7f\u7528\u4e86 GPU \u52a0\u901f\u7684\u5927\u8bed\u8a00\u6a21\u578b\uff0c\u6bd4\u5355\u7eaf\u7528 CPU \u8dd1\u8fd8\u662f\u5feb\u5f88\u591a\u7684\uff0c\u8fd9\u6b21\u53ea\u662f\u4f7f\u7528 CodeGeeX2 \u7b80\u5355\u6d4b\u8bd5\u4e00\u4e0b\u4ee3\u7801\u751f\u6210\u80fd\u529b\uff0c\u8fd8\u6ca1\u6709\u6df1\u5165\u4f7f\u7528\uff0c\u540e\u7eed\u518d\u6df1\u5165\u4f7f\u7528\u4e86\u6709\u7ecf\u9a8c\u4e4b\u540e\u518d\u5206\u4eab\u7ed9\u5927\u5bb6\u3002<\/p>\n<h2><span class=\"ez-toc-section\" id=\"%E5%8F%82%E8%80%83%E8%B5%84%E6%96%99\"><\/span>\u53c2\u8003\u8d44\u6599<span class=\"ez-toc-section-end\"><\/span><\/h2>\n<ul>\n<li><a href=\"https:\/\/blog.csdn.net\/weixin_43734080\/article\/details\/133776347\">https:\/\/blog.csdn.net\/weixin_43734080\/article\/details\/133776347<\/a><\/li>\n<li><a href=\"https:\/\/www.modelscope.cn\/models\/ZhipuAI\/CodeGeeX2-6B\/files\">https:\/\/www.modelscope.cn\/models\/ZhipuAI\/CodeGeeX2-6B\/files<\/a><\/li>\n<li><a href=\"https:\/\/github.com\/chatchat-space\/Langchain-Chatchat\/issues\/1835\">https:\/\/github.com\/chatchat-space\/Langchain-Chatchat\/issues\/1835<\/a><\/li>\n<li><a href=\"https:\/\/github.com\/pytorch\/pytorch\/issues\/30664\">https:\/\/github.com\/pytorch\/pytorch\/issues\/30664<\/a><\/li>\n<li><a href=\"https:\/\/pytorch.org\/\">https:\/\/pytorch.org\/<\/a><\/li>\n<li><a href=\"https:\/\/github.com\/THUDM\/ChatGLM-6B\/issues\/88\">https:\/\/github.com\/THUDM\/ChatGLM-6B\/issues\/88<\/a><\/li>\n<li><a href=\"https:\/\/github.com\/DeXtmL\/bitsandbytes-win-prebuilt\">https:\/\/github.com\/DeXtmL\/bitsandbytes-win-prebuilt<\/a><\/li>\n<li><a href=\"https:\/\/github.com\/THUDM\/CodeGeeX2\">https:\/\/github.com\/THUDM\/CodeGeeX2<\/a><\/li>\n<li><a href=\"https:\/\/codegeex.cn\/\">https:\/\/codegeex.cn\/<\/a><\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>\u4e4b\u524d\u770b\u5230 CodeGeeX2 \u5927\u8bed\u8a00\u6a21\u578b\u53d1\u5e03\u4e86\uff0c\u4ecb\u7ecd\u8bf4\u6bd4\u4e0a\u4e00\u4e2a\u7248\u672c\u6548\u679c\u8981\u597d\u4e0a\u4e0d\u5c11 &hellip;<\/p>\n<p class=\"read-more\"><a href=\"https:\/\/xujiwei.com\/blog\/2024\/02\/codegeex2-local-deploy\/\">\u7ee7\u7eed\u9605\u8bfb &raquo;<\/a><\/p>\n","protected":false},"author":2,"featured_media":1193,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[167],"tags":[375,376,380,377,381,378,379,36],"_links":{"self":[{"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/posts\/1192"}],"collection":[{"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/comments?post=1192"}],"version-history":[{"count":0,"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/posts\/1192\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/media\/1193"}],"wp:attachment":[{"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/media?parent=1192"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/categories?post=1192"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/xujiwei.com\/blog\/wp-json\/wp\/v2\/tags?post=1192"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}