大改

2026-02-20 19:50:15 +00:00 · 2026-02-10 23:08:39 +08:00
parent 1baa36026c
commit 6680585975
172 changed files with 52867 additions and 892 deletions
--- a/vllm_npu/models/init.py
+++ b/vllm_npu/models/init.py
@@ -0,0 +1,48 @@
+from vllm import ModelRegistry
+
+import vllm_npu.envs as envs_ascend
+
+
+def register_model():
+    ModelRegistry.register_model(
+        "Qwen2VLForConditionalGeneration",
+        "vllm_npu.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
+
+    ModelRegistry.register_model(
+        "Qwen3VLMoeForConditionalGeneration",
+        "vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen3VLMoeForConditionalGeneration"
+    )
+
+    ModelRegistry.register_model(
+        "Qwen3VLForConditionalGeneration",
+        "vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration"
+    )
+
+    if envs_ascend.USE_OPTIMIZED_MODEL:
+        ModelRegistry.register_model(
+            "Qwen2_5_VLForConditionalGeneration",
+            "vllm_npu.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
+        )
+        ModelRegistry.register_model(
+            "Qwen2_5OmniModel",
+            "vllm_npu.models.qwen2_5_omni_thinker:AscendQwen2_5OmniThinkerForConditionalGeneration"
+        )
+    else:
+        ModelRegistry.register_model(
+            "Qwen2_5_VLForConditionalGeneration",
+            "vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen2_5_VLForConditionalGeneration_Without_Padding"
+        )
+
+    ModelRegistry.register_model(
+        "DeepseekV32ForCausalLM",
+        "vllm_npu.models.deepseek_v3_2:CustomDeepseekV3ForCausalLM")
+
+    # There is no PanguProMoEForCausalLM in vLLM, so we should register it before vLLM config initialization
+    # to make sure the model can be loaded correctly. This register step can be removed once vLLM support PanguProMoEForCausalLM.
+    ModelRegistry.register_model(
+        "PanguProMoEForCausalLM",
+        "vllm_npu.torchair.models.torchair_pangu_moe:PanguProMoEForCausalLM"
+    )
+    ModelRegistry.register_model(
+        "Qwen3NextForCausalLM",
+        "vllm_npu.models.qwen3_next:CustomQwen3NextForCausalLM")