mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
"""
|
|
vllm_npu — Ascend NPU platform plugin for vLLM.
|
|
|
|
The ``register()`` function is discovered by vLLM through the
|
|
``vllm.platform_plugins`` entry-point and returns the fully-qualified
|
|
class name of the platform implementation.
|
|
"""
|
|
|
|
|
|
def register():
|
|
"""Return the fully-qualified name of the NPU platform class."""
|
|
# Apply CUDA→NPU compatibility patches early so that any code
|
|
# referencing torch.cuda.Stream / Event / etc. will transparently
|
|
# be redirected to the torch.npu equivalents.
|
|
from vllm_npu.cuda_compat import _patch_cuda_to_npu
|
|
_patch_cuda_to_npu()
|
|
|
|
return "vllm_npu.platform.NPUPlatform"
|
|
|
|
|
|
def register_npu_ops():
|
|
"""Register Ascend NPU op overrides with vLLM's CustomOp system.
|
|
|
|
Must be called AFTER the platform is established (e.g., during
|
|
worker init or check_and_update_config), NOT during register().
|
|
"""
|
|
from vllm.model_executor.custom_op import CustomOp
|
|
|
|
from vllm_npu.ops.activation import AscendSiluAndMul
|
|
from vllm_npu.ops.layernorm import AscendRMSNorm
|
|
from vllm_npu.ops.rotary_embedding import AscendRotaryEmbedding
|
|
|
|
for name, op_cls in {
|
|
"SiluAndMul": AscendSiluAndMul,
|
|
"RMSNorm": AscendRMSNorm,
|
|
"RotaryEmbedding": AscendRotaryEmbedding,
|
|
}.items():
|
|
CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)
|