mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 19:50:15 +00:00
fix: Defer SOC version initialization to avoid NPU lazy init issues
This commit is contained in:
@@ -87,9 +87,8 @@ class NPUWorker(WorkerBase):
|
||||
ops.register_dummy_fusion_op()
|
||||
_register_atb_extensions()
|
||||
register_ascend_customop(vllm_config)
|
||||
# init ascend config and soc version
|
||||
# init ascend config (soc version deferred to init_device)
|
||||
init_ascend_config(vllm_config)
|
||||
init_ascend_soc_version()
|
||||
use_sparse = False
|
||||
if vllm_config.model_config is not None:
|
||||
use_sparse = hasattr(vllm_config.model_config.hf_config,
|
||||
@@ -213,6 +212,8 @@ class NPUWorker(WorkerBase):
|
||||
|
||||
def init_device(self):
|
||||
device = self._init_device()
|
||||
# Now that NPU device is initialized, get soc version
|
||||
init_ascend_soc_version()
|
||||
# Init ModelRunner here, so that we have access to self.device.
|
||||
self.model_runner = NPUModelRunner(self.vllm_config, device)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user