mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 11:42:30 +00:00
- NPUPlatform: device management, HCCL process group, config adaptation - AscendAttentionBackend: npu_fusion_attention (prefill) + npu_incre_flash_attention (decode) - NPUCommunicator: HCCL-based distributed communication - NPUWorker: NPU device init, memory profiling - Custom ops: SiluAndMul, RMS norm, rotary embedding - Plugin registered via vllm.platform_plugins entry point Based on vllm-ascend official pattern, targeting Ascend 910B
25 lines
586 B
Python
25 lines
586 B
Python
"""
|
|
vllm-npu-plugin: Ascend NPU platform plugin for vLLM v0.11.0.
|
|
|
|
Registers as an out-of-tree platform plugin via the
|
|
``vllm.platform_plugins`` entry-point group.
|
|
"""
|
|
|
|
from setuptools import find_packages, setup
|
|
|
|
setup(
|
|
name="vllm-npu-plugin",
|
|
version="0.1.0",
|
|
description="Ascend NPU platform plugin for vLLM v0.11.0",
|
|
packages=find_packages(),
|
|
python_requires=">=3.9",
|
|
install_requires=[
|
|
# vllm must already be installed (v0.11.0)
|
|
],
|
|
entry_points={
|
|
"vllm.platform_plugins": [
|
|
"npu = vllm_npu:register",
|
|
],
|
|
},
|
|
)
|