mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
大改
This commit is contained in:
@@ -1,38 +1,33 @@
|
||||
"""
|
||||
vllm_npu — Ascend NPU platform plugin for vLLM.
|
||||
|
||||
The ``register()`` function is discovered by vLLM through the
|
||||
``vllm.platform_plugins`` entry-point and returns the fully-qualified
|
||||
class name of the platform implementation.
|
||||
"""
|
||||
#
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
|
||||
def register():
|
||||
"""Return the fully-qualified name of the NPU platform class."""
|
||||
# Apply CUDA→NPU compatibility patches early so that any code
|
||||
# referencing torch.cuda.Stream / Event / etc. will transparently
|
||||
# be redirected to the torch.npu equivalents.
|
||||
from vllm_npu.cuda_compat import _patch_cuda_to_npu
|
||||
_patch_cuda_to_npu()
|
||||
"""Register the NPU platform."""
|
||||
|
||||
return "vllm_npu.platform.NPUPlatform"
|
||||
|
||||
|
||||
def register_npu_ops():
|
||||
"""Register Ascend NPU op overrides with vLLM's CustomOp system.
|
||||
def register_model():
|
||||
|
||||
Must be called AFTER the platform is established (e.g., during
|
||||
worker init or check_and_update_config), NOT during register().
|
||||
"""
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
from .models import register_model
|
||||
register_model()
|
||||
|
||||
from vllm_npu.ops.activation import AscendSiluAndMul
|
||||
from vllm_npu.ops.layernorm import AscendRMSNorm
|
||||
from vllm_npu.ops.rotary_embedding import AscendRotaryEmbedding
|
||||
|
||||
for name, op_cls in {
|
||||
"SiluAndMul": AscendSiluAndMul,
|
||||
"RMSNorm": AscendRMSNorm,
|
||||
"RotaryEmbedding": AscendRotaryEmbedding,
|
||||
}.items():
|
||||
CustomOp.register_oot(_decorated_op_cls=op_cls, name=name)
|
||||
def register_connector():
|
||||
from vllm_npu.distributed import register_connector
|
||||
register_connector()
|
||||
|
||||
Reference in New Issue
Block a user