diff --git a/vllm_npu/utils.py b/vllm_npu/utils.py index 9b20407..a2d7909 100644 --- a/vllm_npu/utils.py +++ b/vllm_npu/utils.py @@ -47,7 +47,15 @@ REGISTERED_ASCEND_OPS = {} ACL_FORMAT_FRACTAL_ND = 2 ACL_FORMAT_FRACTAL_NZ = 29 -_CUSTOM_OP_ENABLED = None +# Pre-check for vllm_npu_C at module load time so that torch.compile/dynamo +# never encounters a failing import during tracing. +try: + import vllm_npu.vllm_npu_C # type: ignore # noqa: F401 + import vllm_npu.meta_registration # type: ignore # noqa: F401 + _CUSTOM_OP_ENABLED = True +except (ImportError, ModuleNotFoundError): + _CUSTOM_OP_ENABLED = False + _IS_310P = None _SLEEP_MODE_ENABLED = None _CURRENT_STREAM = None @@ -219,25 +227,10 @@ def try_register_lib(lib_name: str, lib_info: str = ""): def enable_custom_op(): """ - Enable lazy init for vllm_npu_C to avoid early initialization of CANN's RTS component. - Ensure that ASCEND_RT_VISIBLE_DEVICES can be dynamically modified before torch.npu.set_device(). + Check if vllm_npu_C custom ops are available. + The import check is done at module load time to avoid torch.compile/dynamo + tracing failures. """ - global _CUSTOM_OP_ENABLED - if _CUSTOM_OP_ENABLED is not None: - return _CUSTOM_OP_ENABLED - try: - # isort: off - # register custom ops into torch_library here - import vllm_npu.vllm_npu_C # type: ignore # noqa: F401 - # register the meta implementation for custom kernel if necessary - import vllm_npu.meta_registration # type: ignore # noqa: F401 - # isort: on - _CUSTOM_OP_ENABLED = True - except ImportError: - _CUSTOM_OP_ENABLED = False - logger.warning( - "Warning: Failed to register custom ops, all custom ops will be disabled" - ) return _CUSTOM_OP_ENABLED