mirror of
https://github.com/handsomezhuzhu/vllm-npu-plugin.git
synced 2026-02-20 11:42:30 +00:00
fix: Defer SOC version initialization to avoid NPU lazy init issues
This commit is contained in:
@@ -67,11 +67,13 @@ _IS_EAGLE_MODE = None
|
|||||||
def is_310p():
|
def is_310p():
|
||||||
global _IS_310P
|
global _IS_310P
|
||||||
if _IS_310P is None:
|
if _IS_310P is None:
|
||||||
try:
|
# Check if SOC version is already known from init_ascend_soc_version()
|
||||||
soc_version = torch_npu.npu.get_soc_version()
|
if _ascend_soc_version is not None:
|
||||||
# 310P soc_version range: 200-209
|
_IS_310P = False # 310P is not A2 or A3
|
||||||
_IS_310P = 200 <= soc_version <= 209
|
else:
|
||||||
except Exception:
|
# Avoid calling torch_npu.npu.get_soc_version() here as it
|
||||||
|
# triggers NPU lazy init which breaks forked subprocesses.
|
||||||
|
# Default to False; will be updated after init_device().
|
||||||
_IS_310P = False
|
_IS_310P = False
|
||||||
return _IS_310P
|
return _IS_310P
|
||||||
|
|
||||||
|
|||||||
@@ -87,9 +87,8 @@ class NPUWorker(WorkerBase):
|
|||||||
ops.register_dummy_fusion_op()
|
ops.register_dummy_fusion_op()
|
||||||
_register_atb_extensions()
|
_register_atb_extensions()
|
||||||
register_ascend_customop(vllm_config)
|
register_ascend_customop(vllm_config)
|
||||||
# init ascend config and soc version
|
# init ascend config (soc version deferred to init_device)
|
||||||
init_ascend_config(vllm_config)
|
init_ascend_config(vllm_config)
|
||||||
init_ascend_soc_version()
|
|
||||||
use_sparse = False
|
use_sparse = False
|
||||||
if vllm_config.model_config is not None:
|
if vllm_config.model_config is not None:
|
||||||
use_sparse = hasattr(vllm_config.model_config.hf_config,
|
use_sparse = hasattr(vllm_config.model_config.hf_config,
|
||||||
@@ -213,6 +212,8 @@ class NPUWorker(WorkerBase):
|
|||||||
|
|
||||||
def init_device(self):
|
def init_device(self):
|
||||||
device = self._init_device()
|
device = self._init_device()
|
||||||
|
# Now that NPU device is initialized, get soc version
|
||||||
|
init_ascend_soc_version()
|
||||||
# Init ModelRunner here, so that we have access to self.device.
|
# Init ModelRunner here, so that we have access to self.device.
|
||||||
self.model_runner = NPUModelRunner(self.vllm_config, device)
|
self.model_runner = NPUModelRunner(self.vllm_config, device)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user