From 5bef2da1f1e22b19ea6fde4c8e4b172841e51cd6 Mon Sep 17 00:00:00 2001 From: handsomezhuzhu <2658601135@qq.com> Date: Tue, 10 Feb 2026 22:05:06 +0800 Subject: [PATCH] feat: Implement the NPU platform plugin for vLLM, including platform registration, device management, custom operations, and configuration adaptation. --- vllm_npu/__init__.py | 13 ++++++------- vllm_npu/platform.py | 4 ++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/vllm_npu/__init__.py b/vllm_npu/__init__.py index fa5886f..b03577b 100644 --- a/vllm_npu/__init__.py +++ b/vllm_npu/__init__.py @@ -15,16 +15,15 @@ def register(): from vllm_npu.cuda_compat import _patch_cuda_to_npu _patch_cuda_to_npu() - # Register NPU custom ops with vLLM's CustomOp dispatch so that - # ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels - # instead of falling back to CUDA (which would produce garbage). - _register_npu_ops() - return "vllm_npu.platform.NPUPlatform" -def _register_npu_ops(): - """Register Ascend NPU op overrides with vLLM's CustomOp system.""" +def register_npu_ops(): + """Register Ascend NPU op overrides with vLLM's CustomOp system. + + Must be called AFTER the platform is established (e.g., during + worker init or check_and_update_config), NOT during register(). + """ from vllm.model_executor.custom_op import CustomOp from vllm_npu.ops.activation import AscendSiluAndMul diff --git a/vllm_npu/platform.py b/vllm_npu/platform.py index e5e128e..0c980df 100644 --- a/vllm_npu/platform.py +++ b/vllm_npu/platform.py @@ -180,6 +180,10 @@ class NPUPlatform(Platform): """Adapt vLLM configuration for NPU hardware.""" from vllm.config import CompilationLevel + # Register NPU custom ops (must happen after platform is detected) + from vllm_npu import register_npu_ops + register_npu_ops() + parallel_config = vllm_config.parallel_config cache_config = vllm_config.cache_config compilation_config = vllm_config.compilation_config