From a274fd82ad856756b3759b06b423dfc19de82978 Mon Sep 17 00:00:00 2001 From: handsomezhuzhu <2658601135@qq.com> Date: Tue, 10 Feb 2026 19:23:20 +0800 Subject: [PATCH] fix: accept cache_dtype_str in get_kv_cache_shape --- vllm_npu/attention/attention_v1.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm_npu/attention/attention_v1.py b/vllm_npu/attention/attention_v1.py index 859df09..3c0d687 100644 --- a/vllm_npu/attention/attention_v1.py +++ b/vllm_npu/attention/attention_v1.py @@ -83,6 +83,7 @@ class AscendAttentionBackend(AttentionBackend): block_size: int, num_kv_heads: int, head_size: int, + **kwargs, ) -> Tuple[int, int, int, int]: """KV cache shape: (num_blocks, block_size, num_kv_heads, head_size). @@ -91,6 +92,7 @@ class AscendAttentionBackend(AttentionBackend): """ return (num_blocks, block_size, num_kv_heads, head_size) + @staticmethod def swap_blocks( src_kv_cache: List[torch.Tensor],