mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 11:42:30 +00:00
49 lines
1.8 KiB
Python
49 lines
1.8 KiB
Python
from vllm import ModelRegistry
|
|
|
|
import vllm_npu.envs as envs_ascend
|
|
|
|
|
|
def register_model():
|
|
ModelRegistry.register_model(
|
|
"Qwen2VLForConditionalGeneration",
|
|
"vllm_npu.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
|
|
|
|
ModelRegistry.register_model(
|
|
"Qwen3VLMoeForConditionalGeneration",
|
|
"vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen3VLMoeForConditionalGeneration"
|
|
)
|
|
|
|
ModelRegistry.register_model(
|
|
"Qwen3VLForConditionalGeneration",
|
|
"vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration"
|
|
)
|
|
|
|
if envs_ascend.USE_OPTIMIZED_MODEL:
|
|
ModelRegistry.register_model(
|
|
"Qwen2_5_VLForConditionalGeneration",
|
|
"vllm_npu.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
|
|
)
|
|
ModelRegistry.register_model(
|
|
"Qwen2_5OmniModel",
|
|
"vllm_npu.models.qwen2_5_omni_thinker:AscendQwen2_5OmniThinkerForConditionalGeneration"
|
|
)
|
|
else:
|
|
ModelRegistry.register_model(
|
|
"Qwen2_5_VLForConditionalGeneration",
|
|
"vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen2_5_VLForConditionalGeneration_Without_Padding"
|
|
)
|
|
|
|
ModelRegistry.register_model(
|
|
"DeepseekV32ForCausalLM",
|
|
"vllm_npu.models.deepseek_v3_2:CustomDeepseekV3ForCausalLM")
|
|
|
|
# There is no PanguProMoEForCausalLM in vLLM, so we should register it before vLLM config initialization
|
|
# to make sure the model can be loaded correctly. This register step can be removed once vLLM support PanguProMoEForCausalLM.
|
|
ModelRegistry.register_model(
|
|
"PanguProMoEForCausalLM",
|
|
"vllm_npu.torchair.models.torchair_pangu_moe:PanguProMoEForCausalLM"
|
|
)
|
|
ModelRegistry.register_model(
|
|
"Qwen3NextForCausalLM",
|
|
"vllm_npu.models.qwen3_next:CustomQwen3NextForCausalLM")
|