feat: Implement the NPU platform plugin for vLLM, including platform registration, device management, custom operations, and configuration adaptation.

This commit is contained in:
2026-02-10 22:05:06 +08:00
parent 4ca9d52cf2
commit 5bef2da1f1
2 changed files with 10 additions and 7 deletions

View File

@@ -15,16 +15,15 @@ def register():
from vllm_npu.cuda_compat import _patch_cuda_to_npu from vllm_npu.cuda_compat import _patch_cuda_to_npu
_patch_cuda_to_npu() _patch_cuda_to_npu()
# Register NPU custom ops with vLLM's CustomOp dispatch so that
# ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels
# instead of falling back to CUDA (which would produce garbage).
_register_npu_ops()
return "vllm_npu.platform.NPUPlatform" return "vllm_npu.platform.NPUPlatform"
def _register_npu_ops(): def register_npu_ops():
"""Register Ascend NPU op overrides with vLLM's CustomOp system.""" """Register Ascend NPU op overrides with vLLM's CustomOp system.
Must be called AFTER the platform is established (e.g., during
worker init or check_and_update_config), NOT during register().
"""
from vllm.model_executor.custom_op import CustomOp from vllm.model_executor.custom_op import CustomOp
from vllm_npu.ops.activation import AscendSiluAndMul from vllm_npu.ops.activation import AscendSiluAndMul

View File

@@ -180,6 +180,10 @@ class NPUPlatform(Platform):
"""Adapt vLLM configuration for NPU hardware.""" """Adapt vLLM configuration for NPU hardware."""
from vllm.config import CompilationLevel from vllm.config import CompilationLevel
# Register NPU custom ops (must happen after platform is detected)
from vllm_npu import register_npu_ops
register_npu_ops()
parallel_config = vllm_config.parallel_config parallel_config = vllm_config.parallel_config
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
compilation_config = vllm_config.compilation_config compilation_config = vllm_config.compilation_config