diff --git a/vllm_npu/utils.py b/vllm_npu/utils.py
index 37b8e4a..9b20407 100644
--- a/vllm_npu/utils.py
+++ b/vllm_npu/utils.py
@@ -67,11 +67,13 @@ _IS_EAGLE_MODE = None
 def is_310p():
     global _IS_310P
     if _IS_310P is None:
-        try:
-            soc_version = torch_npu.npu.get_soc_version()
-            # 310P soc_version range: 200-209
-            _IS_310P = 200 <= soc_version <= 209
-        except Exception:
+        # Check if SOC version is already known from init_ascend_soc_version()
+        if _ascend_soc_version is not None:
+            _IS_310P = False  # 310P is not A2 or A3
+        else:
+            # Avoid calling torch_npu.npu.get_soc_version() here as it
+            # triggers NPU lazy init which breaks forked subprocesses.
+            # Default to False; will be updated after init_device().
             _IS_310P = False
     return _IS_310P
 
diff --git a/vllm_npu/worker/worker_v1.py b/vllm_npu/worker/worker_v1.py
index 0281488..72419e5 100644
--- a/vllm_npu/worker/worker_v1.py
+++ b/vllm_npu/worker/worker_v1.py
@@ -87,9 +87,8 @@ class NPUWorker(WorkerBase):
         ops.register_dummy_fusion_op()
         _register_atb_extensions()
         register_ascend_customop(vllm_config)
-        # init ascend config and soc version
+        # init ascend config (soc version deferred to init_device)
         init_ascend_config(vllm_config)
-        init_ascend_soc_version()
         use_sparse = False
         if vllm_config.model_config is not None:
             use_sparse = hasattr(vllm_config.model_config.hf_config,
@@ -213,6 +212,8 @@ class NPUWorker(WorkerBase):
 
     def init_device(self):
         device = self._init_device()
+        # Now that NPU device is initialized, get soc version
+        init_ascend_soc_version()
         # Init ModelRunner here, so that we have access to self.device.
         self.model_runner = NPUModelRunner(self.vllm_config, device)