from vllm import ModelRegistry import vllm_npu.envs as envs_ascend def register_model(): ModelRegistry.register_model( "Qwen2VLForConditionalGeneration", "vllm_npu.models.qwen2_vl:AscendQwen2VLForConditionalGeneration") ModelRegistry.register_model( "Qwen3VLMoeForConditionalGeneration", "vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen3VLMoeForConditionalGeneration" ) ModelRegistry.register_model( "Qwen3VLForConditionalGeneration", "vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration" ) if envs_ascend.USE_OPTIMIZED_MODEL: ModelRegistry.register_model( "Qwen2_5_VLForConditionalGeneration", "vllm_npu.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration" ) ModelRegistry.register_model( "Qwen2_5OmniModel", "vllm_npu.models.qwen2_5_omni_thinker:AscendQwen2_5OmniThinkerForConditionalGeneration" ) else: ModelRegistry.register_model( "Qwen2_5_VLForConditionalGeneration", "vllm_npu.models.qwen2_5_vl_without_padding:AscendQwen2_5_VLForConditionalGeneration_Without_Padding" ) ModelRegistry.register_model( "DeepseekV32ForCausalLM", "vllm_npu.models.deepseek_v3_2:CustomDeepseekV3ForCausalLM") # There is no PanguProMoEForCausalLM in vLLM, so we should register it before vLLM config initialization # to make sure the model can be loaded correctly. This register step can be removed once vLLM support PanguProMoEForCausalLM. ModelRegistry.register_model( "PanguProMoEForCausalLM", "vllm_npu.torchair.models.torchair_pangu_moe:PanguProMoEForCausalLM" ) ModelRegistry.register_model( "Qwen3NextForCausalLM", "vllm_npu.models.qwen3_next:CustomQwen3NextForCausalLM")