feat: add CUDA-to-NPU monkey patches for GPUModelRunner compatibility

2026-02-20 19:50:15 +00:00 · 2026-02-10 19:09:14 +08:00
parent 0765fc9fd3
commit 693e0a1d89
2 changed files with 95 additions and 0 deletions
--- a/vllm_npu/init.py
+++ b/vllm_npu/init.py
@@ -9,4 +9,10 @@ class name of the platform implementation.

 def register():
    """Return the fully-qualified name of the NPU platform class."""
+    # Apply CUDA→NPU compatibility patches early so that any code
+    # referencing torch.cuda.Stream / Event / etc. will transparently
+    # be redirected to the torch.npu equivalents.
+    from vllm_npu.cuda_compat import _patch_cuda_to_npu
+    _patch_cuda_to_npu()
+
    return "vllm_npu.platform.NPUPlatform"