mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
fix: add initialize_cache method to NPU worker
This commit is contained in:
@@ -184,6 +184,12 @@ class NPUWorker(WorkerBase):
|
||||
def get_kv_cache_spec(self) -> KVCacheSpec:
|
||||
return self.model_runner.get_kv_cache_spec()
|
||||
|
||||
def initialize_cache(self, num_gpu_blocks: int,
|
||||
num_cpu_blocks: int) -> None:
|
||||
"""Store the number of KV cache blocks."""
|
||||
self.cache_config.num_gpu_blocks = num_gpu_blocks
|
||||
self.cache_config.num_cpu_blocks = num_cpu_blocks
|
||||
|
||||
def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
|
||||
"""Allocate KV caches on NPU."""
|
||||
self.model_runner.initialize_kv_cache(kv_cache_config)
|
||||
|
||||
Reference in New Issue
Block a user