""" vllm_npu — Ascend NPU platform plugin for vLLM. The ``register()`` function is discovered by vLLM through the ``vllm.platform_plugins`` entry-point and returns the fully-qualified class name of the platform implementation. """ def register(): """Return the fully-qualified name of the NPU platform class.""" # Apply CUDA→NPU compatibility patches early so that any code # referencing torch.cuda.Stream / Event / etc. will transparently # be redirected to the torch.npu equivalents. from vllm_npu.cuda_compat import _patch_cuda_to_npu _patch_cuda_to_npu() # Register NPU custom ops with vLLM's CustomOp dispatch so that # ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels # instead of falling back to CUDA (which would produce garbage). _register_npu_ops() return "vllm_npu.platform.NPUPlatform" def _register_npu_ops(): """Register Ascend NPU op overrides with vLLM's CustomOp system.""" from vllm.model_executor.custom_op import CustomOp from vllm_npu.ops.activation import AscendSiluAndMul from vllm_npu.ops.layernorm import AscendRMSNorm from vllm_npu.ops.rotary_embedding import AscendRotaryEmbedding for name, op_cls in { "SiluAndMul": AscendSiluAndMul, "RMSNorm": AscendRMSNorm, "RotaryEmbedding": AscendRotaryEmbedding, }.items(): CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)