mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 11:42:30 +00:00
feat: Implement the NPU platform plugin for vLLM, including platform registration, device management, custom operations, and configuration adaptation.
This commit is contained in:
@@ -15,16 +15,15 @@ def register():
|
|||||||
from vllm_npu.cuda_compat import _patch_cuda_to_npu
|
from vllm_npu.cuda_compat import _patch_cuda_to_npu
|
||||||
_patch_cuda_to_npu()
|
_patch_cuda_to_npu()
|
||||||
|
|
||||||
# Register NPU custom ops with vLLM's CustomOp dispatch so that
|
|
||||||
# ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels
|
|
||||||
# instead of falling back to CUDA (which would produce garbage).
|
|
||||||
_register_npu_ops()
|
|
||||||
|
|
||||||
return "vllm_npu.platform.NPUPlatform"
|
return "vllm_npu.platform.NPUPlatform"
|
||||||
|
|
||||||
|
|
||||||
def _register_npu_ops():
|
def register_npu_ops():
|
||||||
"""Register Ascend NPU op overrides with vLLM's CustomOp system."""
|
"""Register Ascend NPU op overrides with vLLM's CustomOp system.
|
||||||
|
|
||||||
|
Must be called AFTER the platform is established (e.g., during
|
||||||
|
worker init or check_and_update_config), NOT during register().
|
||||||
|
"""
|
||||||
from vllm.model_executor.custom_op import CustomOp
|
from vllm.model_executor.custom_op import CustomOp
|
||||||
|
|
||||||
from vllm_npu.ops.activation import AscendSiluAndMul
|
from vllm_npu.ops.activation import AscendSiluAndMul
|
||||||
|
|||||||
@@ -180,6 +180,10 @@ class NPUPlatform(Platform):
|
|||||||
"""Adapt vLLM configuration for NPU hardware."""
|
"""Adapt vLLM configuration for NPU hardware."""
|
||||||
from vllm.config import CompilationLevel
|
from vllm.config import CompilationLevel
|
||||||
|
|
||||||
|
# Register NPU custom ops (must happen after platform is detected)
|
||||||
|
from vllm_npu import register_npu_ops
|
||||||
|
register_npu_ops()
|
||||||
|
|
||||||
parallel_config = vllm_config.parallel_config
|
parallel_config = vllm_config.parallel_config
|
||||||
cache_config = vllm_config.cache_config
|
cache_config = vllm_config.cache_config
|
||||||
compilation_config = vllm_config.compilation_config
|
compilation_config = vllm_config.compilation_config
|
||||||
|
|||||||
Reference in New Issue
Block a user