diff --git a/vllm_npu/utils.py b/vllm_npu/utils.py index 37b8e4a..9b20407 100644 --- a/vllm_npu/utils.py +++ b/vllm_npu/utils.py @@ -67,11 +67,13 @@ _IS_EAGLE_MODE = None def is_310p(): global _IS_310P if _IS_310P is None: - try: - soc_version = torch_npu.npu.get_soc_version() - # 310P soc_version range: 200-209 - _IS_310P = 200 <= soc_version <= 209 - except Exception: + # Check if SOC version is already known from init_ascend_soc_version() + if _ascend_soc_version is not None: + _IS_310P = False # 310P is not A2 or A3 + else: + # Avoid calling torch_npu.npu.get_soc_version() here as it + # triggers NPU lazy init which breaks forked subprocesses. + # Default to False; will be updated after init_device(). _IS_310P = False return _IS_310P diff --git a/vllm_npu/worker/worker_v1.py b/vllm_npu/worker/worker_v1.py index 0281488..72419e5 100644 --- a/vllm_npu/worker/worker_v1.py +++ b/vllm_npu/worker/worker_v1.py @@ -87,9 +87,8 @@ class NPUWorker(WorkerBase): ops.register_dummy_fusion_op() _register_atb_extensions() register_ascend_customop(vllm_config) - # init ascend config and soc version + # init ascend config (soc version deferred to init_device) init_ascend_config(vllm_config) - init_ascend_soc_version() use_sparse = False if vllm_config.model_config is not None: use_sparse = hasattr(vllm_config.model_config.hf_config, @@ -213,6 +212,8 @@ class NPUWorker(WorkerBase): def init_device(self): device = self._init_device() + # Now that NPU device is initialized, get soc version + init_ascend_soc_version() # Init ModelRunner here, so that we have access to self.device. self.model_runner = NPUModelRunner(self.vllm_config, device)