Files
QQuiz/backend/services/progress_service.py

150 lines
4.6 KiB
Python

"""
Progress Service - Manages document parsing progress for real-time updates
"""
import asyncio
from typing import Dict, Optional, AsyncGenerator
from datetime import datetime
from enum import Enum
class ProgressStatus(str, Enum):
"""Progress status types"""
PENDING = "pending"
PARSING = "parsing"
SPLITTING = "splitting"
PROCESSING_CHUNK = "processing_chunk"
DEDUPLICATING = "deduplicating"
SAVING = "saving"
COMPLETED = "completed"
FAILED = "failed"
class ProgressUpdate:
"""Progress update data structure"""
def __init__(
self,
exam_id: int,
status: ProgressStatus,
message: str,
progress: float = 0.0,
total_chunks: int = 0,
current_chunk: int = 0,
questions_extracted: int = 0,
questions_added: int = 0,
duplicates_removed: int = 0
):
self.exam_id = exam_id
self.status = status
self.message = message
self.progress = progress # 0-100
self.total_chunks = total_chunks
self.current_chunk = current_chunk
self.questions_extracted = questions_extracted
self.questions_added = questions_added
self.duplicates_removed = duplicates_removed
self.timestamp = datetime.now().isoformat()
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization"""
return {
"exam_id": self.exam_id,
"status": self.status.value,
"message": self.message,
"progress": round(self.progress, 1),
"total_chunks": self.total_chunks,
"current_chunk": self.current_chunk,
"questions_extracted": self.questions_extracted,
"questions_added": self.questions_added,
"duplicates_removed": self.duplicates_removed,
"timestamp": self.timestamp
}
class ProgressService:
"""Service for managing parsing progress"""
def __init__(self):
# Store progress updates for each exam
self._progress: Dict[int, ProgressUpdate] = {}
# Store queues for SSE connections
self._queues: Dict[int, list] = {}
async def update_progress(self, update: ProgressUpdate):
"""
Update progress for an exam and notify all listeners
Args:
update: Progress update object
"""
exam_id = update.exam_id
self._progress[exam_id] = update
# Send to all connected SSE clients for this exam
if exam_id in self._queues:
dead_queues = []
for queue in self._queues[exam_id]:
try:
await queue.put(update)
except Exception as e:
print(f"[Progress] Failed to send update to queue: {e}")
dead_queues.append(queue)
# Clean up dead queues
for dead_queue in dead_queues:
self._queues[exam_id].remove(dead_queue)
def get_progress(self, exam_id: int) -> Optional[ProgressUpdate]:
"""Get current progress for an exam"""
return self._progress.get(exam_id)
async def subscribe(self, exam_id: int) -> AsyncGenerator[ProgressUpdate, None]:
"""
Subscribe to progress updates for an exam (SSE stream)
Args:
exam_id: Exam ID to subscribe to
Yields:
Progress updates as they occur
"""
# Create a queue for this connection
queue = asyncio.Queue()
# Register the queue
if exam_id not in self._queues:
self._queues[exam_id] = []
self._queues[exam_id].append(queue)
try:
# Send current progress if exists
current_progress = self.get_progress(exam_id)
if current_progress:
yield current_progress
# Stream updates
while True:
update = await queue.get()
yield update
# Stop streaming if completed or failed
if update.status in [ProgressStatus.COMPLETED, ProgressStatus.FAILED]:
break
finally:
# Cleanup
if exam_id in self._queues and queue in self._queues[exam_id]:
self._queues[exam_id].remove(queue)
if not self._queues[exam_id]:
del self._queues[exam_id]
def clear_progress(self, exam_id: int):
"""Clear progress data for an exam"""
if exam_id in self._progress:
del self._progress[exam_id]
if exam_id in self._queues:
del self._queues[exam_id]
# Singleton instance
progress_service = ProgressService()