大改

2026-02-20 19:50:15 +00:00 · 2026-02-10 23:08:39 +08:00
parent 1baa36026c
commit 6680585975
172 changed files with 52867 additions and 892 deletions
--- a/vllm_npu/multistream/base.py
+++ b/vllm_npu/multistream/base.py
@@ -0,0 +1,29 @@
+from dataclasses import dataclass
+from enum import Enum
+
+
+class MSEventKey(Enum):
+    ATTN_COM_FINISH = 0
+    ATTN_AR_FINISH = 1
+    FFN_COM_FINISH = 2
+    FFN_AR_FINISH = 3
+    # events for MOE dispatch and combine
+    MOE_BEFORE_COMM = 4
+    MOE_AFTER_COMM = 5
+    # events for shared expert
+    MOE_SE_COMM_FINISH = 6
+    MOE_SE_COMP_FINISH = 7
+    MOE_GATE_FINISH = 8
+
+
+@dataclass
+class MSAttentionMetadataSplitConfig:
+    """
+    micro batch split config for split attention metadata
+    """
+    # micro batch num
+    num_micro_batches: int = 2
+    # split micro batches only when total tokens >= min_total_tokens_to_split
+    min_total_tokens_to_split: int = 256
+    # split micro batches only when prefill tokens >= min_prefill_tokens_to_split
+    min_prefill_tokens_to_split: int = 64