fix: initialize TP/PP parallel groups after distributed environment

This commit is contained in:
2026-02-10 19:14:29 +08:00
parent 693e0a1d89
commit c3631d65c2

View File

@@ -13,7 +13,10 @@ from typing import TYPE_CHECKING, Any, Optional
import torch import torch
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.distributed import init_distributed_environment from vllm.distributed import (
ensure_model_parallel_initialized,
init_distributed_environment,
)
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.platforms import current_platform from vllm.platforms import current_platform
@@ -92,6 +95,14 @@ class NPUWorker(WorkerBase):
backend="hccl", backend="hccl",
) )
# Initialize TP / PP parallel groups
ensure_model_parallel_initialized(
tensor_model_parallel_size=(
self.parallel_config.tensor_parallel_size),
pipeline_model_parallel_size=(
self.parallel_config.pipeline_parallel_size),
)
# Set random seed # Set random seed
current_platform.seed_everything(self.model_config.seed) current_platform.seed_everything(self.model_config.seed)