fix: accept cache_dtype_str in get_kv_cache_shape

2026-02-20 19:50:15 +00:00 · 2026-02-10 19:23:20 +08:00
parent c3631d65c2
commit a274fd82ad
1 changed files with 2 additions and 0 deletions
--- a/vllm_npu/attention/attention_v1.py
+++ b/vllm_npu/attention/attention_v1.py
@@ -83,6 +83,7 @@ class AscendAttentionBackend(AttentionBackend):
        block_size: int,
        num_kv_heads: int,
        head_size: int,
        **kwargs,
    ) -> Tuple[int, int, int, int]:
        """KV cache shape: (num_blocks, block_size, num_kv_heads, head_size).
@@ -91,6 +92,7 @@ class AscendAttentionBackend(AttentionBackend):
        """
        return (num_blocks, block_size, num_kv_heads, head_size)
    @staticmethod
    def swap_blocks(
        src_kv_cache: List[torch.Tensor],