vllm-npu-plugin/vllm_npu/__init__.py

"""
vllm_npu — Ascend NPU platform plugin for vLLM.

The ``register()`` function is discovered by vLLM through the
``vllm.platform_plugins`` entry-point and returns the fully-qualified
class name of the platform implementation.
"""


def register():
    """Return the fully-qualified name of the NPU platform class."""
    # Apply CUDA→NPU compatibility patches early so that any code
    # referencing torch.cuda.Stream / Event / etc. will transparently
    # be redirected to the torch.npu equivalents.
    from vllm_npu.cuda_compat import _patch_cuda_to_npu
    _patch_cuda_to_npu()

    # Register NPU custom ops with vLLM's CustomOp dispatch so that
    # ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels
    # instead of falling back to CUDA (which would produce garbage).
    _register_npu_ops()

    return "vllm_npu.platform.NPUPlatform"


def _register_npu_ops():
    """Register Ascend NPU op overrides with vLLM's CustomOp system."""
    from vllm.model_executor.custom_op import CustomOp

    from vllm_npu.ops.activation import AscendSiluAndMul
    from vllm_npu.ops.layernorm import AscendRMSNorm
    from vllm_npu.ops.rotary_embedding import AscendRotaryEmbedding

    for name, op_cls in {
        "SiluAndMul": AscendSiluAndMul,
        "RMSNorm": AscendRMSNorm,
        "RotaryEmbedding": AscendRotaryEmbedding,
    }.items():
        CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)