mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
fix: accept cache_dtype_str in get_kv_cache_shape
This commit is contained in:
@@ -83,6 +83,7 @@ class AscendAttentionBackend(AttentionBackend):
|
||||
block_size: int,
|
||||
num_kv_heads: int,
|
||||
head_size: int,
|
||||
**kwargs,
|
||||
) -> Tuple[int, int, int, int]:
|
||||
"""KV cache shape: (num_blocks, block_size, num_kv_heads, head_size).
|
||||
|
||||
@@ -91,6 +92,7 @@ class AscendAttentionBackend(AttentionBackend):
|
||||
"""
|
||||
return (num_blocks, block_size, num_kv_heads, head_size)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def swap_blocks(
|
||||
src_kv_cache: List[torch.Tensor],
|
||||
|
||||
Reference in New Issue
Block a user