大改

2026-02-21 04:00:15 +00:00 · 2026-02-10 23:08:39 +08:00
parent 1baa36026c
commit 6680585975
172 changed files with 52867 additions and 892 deletions
--- a/vllm_npu/torchair/ops/torchair_layernorm.py
+++ b/vllm_npu/torchair/ops/torchair_layernorm.py
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+
+from typing import Optional, Tuple, Union
+
+import torch
+from vllm.config import get_current_vllm_config
+from vllm.model_executor.layers.layernorm import RMSNorm
+
+_original_re_init = RMSNorm.__init__
+
+
+def torchair_rmsnorm_init_(
+    self,
+    hidden_size: int,
+    eps: float = 1e-6,
+    var_hidden_size: Optional[int] = None,
+    has_weight: bool = True,
+    dtype: Optional[torch.dtype] = None,
+) -> None:
+    _original_re_init(self, hidden_size, eps, var_hidden_size, has_weight,
+                      dtype)
+    vllm_config = get_current_vllm_config()
+    self.bias = None
+    # quantization with anti_method m4 will generate none-zero norm bias
+    if vllm_config.quant_config is not None and \
+            any("norm.bias" in name for name in vllm_config.quant_config.quant_description.keys()):
+        self.bias = torch.nn.Parameter(torch.zeros(hidden_size),
+                                       requires_grad=False)
+
+
+def torchair_rmsnorm_forward_oot(
+    self,
+    x: torch.Tensor,
+    residual: Optional[torch.Tensor] = None,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    """AscendRMSNorm forward in torchair mode.
+
+    The key difference from the original implementation is the removal of operators
+    from the torch.ops.vllm class, as these operators only function in non-torchair
+    modes. Adding them back would cause the graph compilation to fail.
+    """
+
+    import torch_npu
+
+    from vllm_npu.utils import is_310p
+    if residual is not None:
+        if is_310p():
+            orig_dtype = residual.dtype
+            x = x + residual.to(x.dtype)
+            residual = x.to(orig_dtype)
+            x, _ = torch_npu.npu_rms_norm(x, self.weight,
+                                          self.variance_epsilon)
+        else:
+            x, _, residual = torch_npu.npu_add_rms_norm(
+                x, residual, self.weight, self.variance_epsilon)
+        if self.bias is not None:
+            x.add_(self.bias)
+        return x, residual
+
+    x, residual = torch_npu.npu_rms_norm(x, self.weight, self.variance_epsilon)
+    if self.bias is not None:
+        x.add_(self.bias)
+    return x