mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
feat: Add Ascend NPU attention backend with NPU-specific FlashAttention, LayerNorm, and Rotary Embedding implementations.
This commit is contained in:
@@ -15,4 +15,25 @@ def register():
|
||||
from vllm_npu.cuda_compat import _patch_cuda_to_npu
|
||||
_patch_cuda_to_npu()
|
||||
|
||||
# Register NPU custom ops with vLLM's CustomOp dispatch so that
|
||||
# ops like SiluAndMul, RMSNorm, RotaryEmbedding use NPU kernels
|
||||
# instead of falling back to CUDA (which would produce garbage).
|
||||
_register_npu_ops()
|
||||
|
||||
return "vllm_npu.platform.NPUPlatform"
|
||||
|
||||
|
||||
def _register_npu_ops():
|
||||
"""Register Ascend NPU op overrides with vLLM's CustomOp system."""
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
|
||||
from vllm_npu.ops.activation import AscendSiluAndMul
|
||||
from vllm_npu.ops.layernorm import AscendRMSNorm
|
||||
from vllm_npu.ops.rotary_embedding import AscendRotaryEmbedding
|
||||
|
||||
for name, op_cls in {
|
||||
"SiluAndMul": AscendSiluAndMul,
|
||||
"RMSNorm": AscendRMSNorm,
|
||||
"RotaryEmbedding": AscendRotaryEmbedding,
|
||||
}.items():
|
||||
CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)
|
||||
|
||||
Reference in New Issue
Block a user