feat: Implement the NPU platform plugin for vLLM, including platform registration, device management, custom operations, and configuration adaptation.

2026-02-20 19:50:15 +00:00 · 2026-02-10 22:05:06 +08:00
parent 4ca9d52cf2
commit 5bef2da1f1
2 changed files with 10 additions and 7 deletions
--- a/vllm_npu/init.py
+++ b/vllm_npu/init.py
@@ -15,16 +15,15 @@ def register():
    from vllm_npu.cuda_compat import _patch_cuda_to_npu
    _patch_cuda_to_npu()
    # Register NPU custom ops with vLLM's CustomOp dispatch so that
    # ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels
    # instead of falling back to CUDA (which would produce garbage).
    _register_npu_ops()
    return "vllm_npu.platform.NPUPlatform"
-def _register_npu_ops():
+def register_npu_ops():
-    """Register Ascend NPU op overrides with vLLM's CustomOp system."""
+    """Register Ascend NPU op overrides with vLLM's CustomOp system.
    Must be called AFTER the platform is established (e.g., during
    worker init or check_and_update_config), NOT during register().
    """
    from vllm.model_executor.custom_op import CustomOp
    from vllm_npu.ops.activation import AscendSiluAndMul
--- a/vllm_npu/platform.py
+++ b/vllm_npu/platform.py
@@ -180,6 +180,10 @@ class NPUPlatform(Platform):
        """Adapt vLLM configuration for NPU hardware."""
        from vllm.config import CompilationLevel
        # Register NPU custom ops (must happen after platform is detected)
        from vllm_npu import register_npu_ops
        register_npu_ops()
        parallel_config = vllm_config.parallel_config
        cache_config = vllm_config.cache_config
        compilation_config = vllm_config.compilation_config