mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
fix: add initialize_cache method to NPU worker
This commit is contained in:
@@ -184,6 +184,12 @@ class NPUWorker(WorkerBase):
|
|||||||
def get_kv_cache_spec(self) -> KVCacheSpec:
|
def get_kv_cache_spec(self) -> KVCacheSpec:
|
||||||
return self.model_runner.get_kv_cache_spec()
|
return self.model_runner.get_kv_cache_spec()
|
||||||
|
|
||||||
|
def initialize_cache(self, num_gpu_blocks: int,
|
||||||
|
num_cpu_blocks: int) -> None:
|
||||||
|
"""Store the number of KV cache blocks."""
|
||||||
|
self.cache_config.num_gpu_blocks = num_gpu_blocks
|
||||||
|
self.cache_config.num_cpu_blocks = num_cpu_blocks
|
||||||
|
|
||||||
def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
|
def initialize_from_config(self, kv_cache_config: KVCacheConfig) -> None:
|
||||||
"""Allocate KV caches on NPU."""
|
"""Allocate KV caches on NPU."""
|
||||||
self.model_runner.initialize_kv_cache(kv_cache_config)
|
self.model_runner.initialize_kv_cache(kv_cache_config)
|
||||||
|
|||||||
Reference in New Issue
Block a user