mirror of
https://github.com/handsomezhuzhu/QQuiz.git
synced 2026-02-20 12:00:14 +00:00
长文本拆分,前端反馈还未成功
This commit is contained in:
@@ -21,15 +21,10 @@ FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装系统依赖
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 复制后端依赖文件
|
||||
COPY backend/requirements.txt ./
|
||||
|
||||
# 安装 Python 依赖
|
||||
# 安装 Python 依赖(使用预编译wheel包,无需gcc)
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 复制后端代码
|
||||
|
||||
@@ -2,12 +2,14 @@
|
||||
Exam Router - Handles exam creation, file upload, and deduplication
|
||||
"""
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form, BackgroundTasks
|
||||
from fastapi.responses import StreamingResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select, func, and_
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import os
|
||||
import aiofiles
|
||||
import json
|
||||
|
||||
from database import get_db
|
||||
from models import User, Exam, Question, ExamStatus, SystemConfig
|
||||
@@ -19,6 +21,7 @@ from services.auth_service import get_current_user
|
||||
from services.document_parser import document_parser
|
||||
from services.llm_service import LLMService
|
||||
from services.config_service import load_llm_config
|
||||
from services.progress_service import progress_service
|
||||
from utils import is_allowed_file, calculate_content_hash
|
||||
from dedup_utils import is_duplicate_question
|
||||
|
||||
@@ -264,9 +267,11 @@ async def async_parse_and_save(
|
||||
):
|
||||
"""
|
||||
Background task to parse document and save questions with deduplication.
|
||||
Sends real-time progress updates via SSE.
|
||||
"""
|
||||
from database import AsyncSessionLocal
|
||||
from sqlalchemy import select
|
||||
from services.progress_service import ProgressUpdate, ProgressStatus
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
try:
|
||||
@@ -276,6 +281,14 @@ async def async_parse_and_save(
|
||||
exam.status = ExamStatus.PROCESSING
|
||||
await db.commit()
|
||||
|
||||
# Send initial progress
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.PARSING,
|
||||
message="开始解析文档...",
|
||||
progress=5.0
|
||||
))
|
||||
|
||||
# Load LLM configuration from database
|
||||
llm_config = await load_llm_config(db)
|
||||
llm_service = LLMService(config=llm_config)
|
||||
@@ -293,12 +306,27 @@ async def async_parse_and_save(
|
||||
# Use Gemini's native PDF processing
|
||||
print(f"[Exam {exam_id}] Using Gemini native PDF processing", flush=True)
|
||||
print(f"[Exam {exam_id}] PDF file size: {len(file_content)} bytes", flush=True)
|
||||
questions_data = await llm_service.parse_document_with_pdf(file_content, filename)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.PARSING,
|
||||
message="使用Gemini解析PDF文档...",
|
||||
progress=10.0
|
||||
))
|
||||
|
||||
questions_data = await llm_service.parse_document_with_pdf(file_content, filename, exam_id)
|
||||
else:
|
||||
# Extract text first, then parse
|
||||
if is_pdf:
|
||||
print(f"[Exam {exam_id}] ⚠️ Warning: Using text extraction for PDF (provider does not support native PDF)", flush=True)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.PARSING,
|
||||
message="提取文档文本内容...",
|
||||
progress=10.0
|
||||
))
|
||||
|
||||
print(f"[Exam {exam_id}] Extracting text from document...", flush=True)
|
||||
text_content = await document_parser.parse_file(file_content, filename)
|
||||
|
||||
@@ -309,17 +337,40 @@ async def async_parse_and_save(
|
||||
|
||||
# Check if document is too long and needs splitting
|
||||
if len(text_content) > 5000:
|
||||
print(f"[Exam {exam_id}] Document is long, splitting into chunks...", flush=True)
|
||||
text_chunks = document_parser.split_text_with_overlap(text_content, chunk_size=3000, overlap=1000)
|
||||
print(f"[Exam {exam_id}] Split into {len(text_chunks)} chunks", flush=True)
|
||||
total_chunks = len(text_chunks)
|
||||
|
||||
print(f"[Exam {exam_id}] Document is long, splitting into chunks...", flush=True)
|
||||
print(f"[Exam {exam_id}] Split into {total_chunks} chunks", flush=True)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.SPLITTING,
|
||||
message=f"文档已拆分为 {total_chunks} 个部分",
|
||||
progress=15.0,
|
||||
total_chunks=total_chunks
|
||||
))
|
||||
|
||||
all_questions = []
|
||||
|
||||
for chunk_idx, chunk in enumerate(text_chunks):
|
||||
print(f"[Exam {exam_id}] Processing chunk {chunk_idx + 1}/{len(text_chunks)}...", flush=True)
|
||||
current_chunk = chunk_idx + 1
|
||||
chunk_progress = 15.0 + (60.0 * current_chunk / total_chunks)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.PROCESSING_CHUNK,
|
||||
message=f"正在处理第 {current_chunk}/{total_chunks} 部分...",
|
||||
progress=chunk_progress,
|
||||
total_chunks=total_chunks,
|
||||
current_chunk=current_chunk,
|
||||
questions_extracted=len(all_questions)
|
||||
))
|
||||
|
||||
print(f"[Exam {exam_id}] Processing chunk {current_chunk}/{total_chunks}...", flush=True)
|
||||
try:
|
||||
chunk_questions = await llm_service.parse_document(chunk)
|
||||
print(f"[Exam {exam_id}] Chunk {chunk_idx + 1} extracted {len(chunk_questions)} questions", flush=True)
|
||||
print(f"[Exam {exam_id}] Chunk {current_chunk} extracted {len(chunk_questions)} questions", flush=True)
|
||||
|
||||
# Fuzzy deduplicate across chunks
|
||||
for q in chunk_questions:
|
||||
@@ -327,7 +378,7 @@ async def async_parse_and_save(
|
||||
if not is_duplicate_question(q, all_questions, threshold=0.85):
|
||||
all_questions.append(q)
|
||||
else:
|
||||
print(f"[Exam {exam_id}] Skipped fuzzy duplicate from chunk {chunk_idx + 1}", flush=True)
|
||||
print(f"[Exam {exam_id}] Skipped fuzzy duplicate from chunk {current_chunk}", flush=True)
|
||||
|
||||
except Exception as chunk_error:
|
||||
print(f"[Exam {exam_id}] Chunk {chunk_idx + 1} failed: {str(chunk_error)}", flush=True)
|
||||
@@ -335,11 +386,37 @@ async def async_parse_and_save(
|
||||
|
||||
questions_data = all_questions
|
||||
print(f"[Exam {exam_id}] Total questions after fuzzy deduplication: {len(questions_data)}", flush=True)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.DEDUPLICATING,
|
||||
message=f"所有部分处理完成,提取了 {len(questions_data)} 个题目",
|
||||
progress=75.0,
|
||||
total_chunks=total_chunks,
|
||||
current_chunk=total_chunks,
|
||||
questions_extracted=len(questions_data)
|
||||
))
|
||||
else:
|
||||
print(f"[Exam {exam_id}] Document content preview:\n{text_content[:500]}\n{'...' if len(text_content) > 500 else ''}", flush=True)
|
||||
print(f"[Exam {exam_id}] Calling LLM to extract questions...", flush=True)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.PARSING,
|
||||
message="正在提取题目...",
|
||||
progress=30.0
|
||||
))
|
||||
|
||||
questions_data = await llm_service.parse_document(text_content)
|
||||
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.DEDUPLICATING,
|
||||
message=f"提取了 {len(questions_data)} 个题目",
|
||||
progress=60.0,
|
||||
questions_extracted=len(questions_data)
|
||||
))
|
||||
|
||||
except Exception as parse_error:
|
||||
print(f"[Exam {exam_id}] ⚠️ Parse error details: {type(parse_error).__name__}", flush=True)
|
||||
print(f"[Exam {exam_id}] ⚠️ Parse error message: {str(parse_error)}", flush=True)
|
||||
@@ -351,6 +428,14 @@ async def async_parse_and_save(
|
||||
raise Exception("No questions found in document")
|
||||
|
||||
# Process questions with deduplication and AI answer generation
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.SAVING,
|
||||
message="正在去重并保存题目到数据库...",
|
||||
progress=80.0,
|
||||
questions_extracted=len(questions_data)
|
||||
))
|
||||
|
||||
print(f"[Exam {exam_id}] Processing questions with deduplication...")
|
||||
parse_result = await process_questions_with_dedup(exam_id, questions_data, db, llm_service)
|
||||
|
||||
@@ -370,9 +455,28 @@ async def async_parse_and_save(
|
||||
|
||||
print(f"[Exam {exam_id}] ✅ {parse_result.message}")
|
||||
|
||||
# Send completion progress
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.COMPLETED,
|
||||
message=f"完成!添加了 {parse_result.new_added} 个题目(去重 {parse_result.duplicates_removed} 个)",
|
||||
progress=100.0,
|
||||
questions_extracted=parse_result.total_parsed,
|
||||
questions_added=parse_result.new_added,
|
||||
duplicates_removed=parse_result.duplicates_removed
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Exam {exam_id}] ❌ Error: {str(e)}")
|
||||
|
||||
# Send error progress
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.FAILED,
|
||||
message=f"处理失败:{str(e)}",
|
||||
progress=0.0
|
||||
))
|
||||
|
||||
# Update exam status to failed
|
||||
result = await db.execute(select(Exam).where(Exam.id == exam_id))
|
||||
exam = result.scalar_one()
|
||||
@@ -549,6 +653,70 @@ async def get_exam_detail(
|
||||
return exam
|
||||
|
||||
|
||||
@router.get("/{exam_id}/progress")
|
||||
async def get_exam_progress(
|
||||
exam_id: int,
|
||||
token: Optional[str] = None,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get real-time progress updates for exam document parsing (SSE endpoint)
|
||||
|
||||
Returns Server-Sent Events stream with progress updates
|
||||
"""
|
||||
# Authenticate using token from query parameter (EventSource doesn't support custom headers)
|
||||
from services.auth_service import get_current_user_from_token
|
||||
|
||||
if not token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Token required"
|
||||
)
|
||||
|
||||
try:
|
||||
current_user = await get_current_user_from_token(token, db)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid token"
|
||||
)
|
||||
|
||||
# Verify exam belongs to user
|
||||
result = await db.execute(
|
||||
select(Exam).where(
|
||||
and_(Exam.id == exam_id, Exam.user_id == current_user.id)
|
||||
)
|
||||
)
|
||||
exam = result.scalar_one_or_none()
|
||||
|
||||
if not exam:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Exam not found"
|
||||
)
|
||||
|
||||
async def event_generator():
|
||||
"""Generate SSE events"""
|
||||
async for update in progress_service.subscribe(exam_id):
|
||||
# Format as SSE
|
||||
data = json.dumps(update.to_dict())
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
# Stop if completed or failed
|
||||
if update.status in ["completed", "failed"]:
|
||||
break
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no" # Disable nginx buffering
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/{exam_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def delete_exam(
|
||||
exam_id: int,
|
||||
|
||||
@@ -82,3 +82,42 @@ async def get_optional_user(
|
||||
return await get_current_user(credentials, db)
|
||||
except HTTPException:
|
||||
return None
|
||||
|
||||
|
||||
async def get_current_user_from_token(token: str, db: AsyncSession) -> User:
|
||||
"""
|
||||
Get current user from JWT token string (for SSE with query params).
|
||||
|
||||
Args:
|
||||
token: JWT token string
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
User object
|
||||
|
||||
Raises:
|
||||
Exception: If token is invalid or user not found
|
||||
"""
|
||||
# Decode token
|
||||
payload = decode_access_token(token)
|
||||
if payload is None:
|
||||
raise Exception("Invalid token")
|
||||
|
||||
user_id = payload.get("sub")
|
||||
if user_id is None:
|
||||
raise Exception("Invalid token payload")
|
||||
|
||||
# Convert user_id to int if it's a string
|
||||
try:
|
||||
user_id = int(user_id)
|
||||
except (ValueError, TypeError):
|
||||
raise Exception("Invalid user ID")
|
||||
|
||||
# Get user from database
|
||||
result = await db.execute(select(User).where(User.id == user_id))
|
||||
user = result.scalar_one_or_none()
|
||||
|
||||
if user is None:
|
||||
raise Exception("User not found")
|
||||
|
||||
return user
|
||||
|
||||
@@ -453,7 +453,7 @@ class LLMService:
|
||||
|
||||
return chunks
|
||||
|
||||
async def parse_document_with_pdf(self, pdf_bytes: bytes, filename: str) -> List[Dict[str, Any]]:
|
||||
async def parse_document_with_pdf(self, pdf_bytes: bytes, filename: str, exam_id: int = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Parse PDF document using Gemini's native PDF understanding.
|
||||
Automatically splits large PDFs into overlapping chunks.
|
||||
@@ -462,6 +462,7 @@ class LLMService:
|
||||
Args:
|
||||
pdf_bytes: PDF file content as bytes
|
||||
filename: Original filename for logging
|
||||
exam_id: Optional exam ID for progress updates
|
||||
|
||||
Returns:
|
||||
List of question dictionaries
|
||||
@@ -471,17 +472,44 @@ class LLMService:
|
||||
|
||||
# Split PDF into chunks
|
||||
pdf_chunks = self.split_pdf_pages(pdf_bytes, pages_per_chunk=4, overlap=1)
|
||||
total_chunks = len(pdf_chunks)
|
||||
|
||||
print(f"[Gemini PDF] Processing {len(pdf_chunks)} chunk(s) for {filename}")
|
||||
print(f"[Gemini PDF] Processing {total_chunks} chunk(s) for {filename}")
|
||||
|
||||
# Send progress update if exam_id provided
|
||||
if exam_id:
|
||||
from services.progress_service import progress_service, ProgressUpdate, ProgressStatus
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.SPLITTING,
|
||||
message=f"PDF已拆分为 {total_chunks} 个部分",
|
||||
progress=15.0,
|
||||
total_chunks=total_chunks
|
||||
))
|
||||
|
||||
all_questions = []
|
||||
# Process each chunk with fuzzy deduplication
|
||||
for chunk_idx, chunk_bytes in enumerate(pdf_chunks):
|
||||
print(f"[Gemini PDF] Processing chunk {chunk_idx + 1}/{len(pdf_chunks)}")
|
||||
current_chunk = chunk_idx + 1
|
||||
chunk_progress = 15.0 + (60.0 * current_chunk / total_chunks)
|
||||
|
||||
print(f"[Gemini PDF] Processing chunk {current_chunk}/{total_chunks}")
|
||||
|
||||
# Send progress update
|
||||
if exam_id:
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.PROCESSING_CHUNK,
|
||||
message=f"正在处理第 {current_chunk}/{total_chunks} 部分...",
|
||||
progress=chunk_progress,
|
||||
total_chunks=total_chunks,
|
||||
current_chunk=current_chunk,
|
||||
questions_extracted=len(all_questions)
|
||||
))
|
||||
|
||||
try:
|
||||
questions = await self._parse_pdf_chunk(chunk_bytes, f"{filename}_chunk_{chunk_idx + 1}")
|
||||
print(f"[Gemini PDF] Chunk {chunk_idx + 1} extracted {len(questions)} questions")
|
||||
questions = await self._parse_pdf_chunk(chunk_bytes, f"{filename}_chunk_{current_chunk}")
|
||||
print(f"[Gemini PDF] Chunk {current_chunk} extracted {len(questions)} questions")
|
||||
|
||||
# Fuzzy deduplicate across chunks
|
||||
from dedup_utils import is_duplicate_question
|
||||
@@ -490,15 +518,27 @@ class LLMService:
|
||||
if not is_duplicate_question(q, all_questions, threshold=0.85):
|
||||
all_questions.append(q)
|
||||
else:
|
||||
print(f"[PDF Split] Skipped fuzzy duplicate from chunk {chunk_idx + 1}")
|
||||
print(f"[PDF Split] Skipped fuzzy duplicate from chunk {current_chunk}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Gemini PDF] Chunk {chunk_idx + 1} failed: {str(e)}")
|
||||
print(f"[Gemini PDF] Chunk {current_chunk} failed: {str(e)}")
|
||||
# Continue with other chunks
|
||||
continue
|
||||
|
||||
print(f"[Gemini PDF] Total questions extracted: {len(all_questions)} (after deduplication)")
|
||||
|
||||
# Send final progress for PDF processing
|
||||
if exam_id:
|
||||
await progress_service.update_progress(ProgressUpdate(
|
||||
exam_id=exam_id,
|
||||
status=ProgressStatus.DEDUPLICATING,
|
||||
message=f"PDF处理完成,提取了 {len(all_questions)} 个题目",
|
||||
progress=75.0,
|
||||
total_chunks=total_chunks,
|
||||
current_chunk=total_chunks,
|
||||
questions_extracted=len(all_questions)
|
||||
))
|
||||
|
||||
return all_questions
|
||||
|
||||
async def _parse_pdf_chunk(self, pdf_bytes: bytes, chunk_name: str) -> List[Dict[str, Any]]:
|
||||
|
||||
149
backend/services/progress_service.py
Normal file
149
backend/services/progress_service.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
Progress Service - Manages document parsing progress for real-time updates
|
||||
"""
|
||||
import asyncio
|
||||
from typing import Dict, Optional, AsyncGenerator
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ProgressStatus(str, Enum):
|
||||
"""Progress status types"""
|
||||
PENDING = "pending"
|
||||
PARSING = "parsing"
|
||||
SPLITTING = "splitting"
|
||||
PROCESSING_CHUNK = "processing_chunk"
|
||||
DEDUPLICATING = "deduplicating"
|
||||
SAVING = "saving"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class ProgressUpdate:
|
||||
"""Progress update data structure"""
|
||||
def __init__(
|
||||
self,
|
||||
exam_id: int,
|
||||
status: ProgressStatus,
|
||||
message: str,
|
||||
progress: float = 0.0,
|
||||
total_chunks: int = 0,
|
||||
current_chunk: int = 0,
|
||||
questions_extracted: int = 0,
|
||||
questions_added: int = 0,
|
||||
duplicates_removed: int = 0
|
||||
):
|
||||
self.exam_id = exam_id
|
||||
self.status = status
|
||||
self.message = message
|
||||
self.progress = progress # 0-100
|
||||
self.total_chunks = total_chunks
|
||||
self.current_chunk = current_chunk
|
||||
self.questions_extracted = questions_extracted
|
||||
self.questions_added = questions_added
|
||||
self.duplicates_removed = duplicates_removed
|
||||
self.timestamp = datetime.now().isoformat()
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization"""
|
||||
return {
|
||||
"exam_id": self.exam_id,
|
||||
"status": self.status.value,
|
||||
"message": self.message,
|
||||
"progress": round(self.progress, 1),
|
||||
"total_chunks": self.total_chunks,
|
||||
"current_chunk": self.current_chunk,
|
||||
"questions_extracted": self.questions_extracted,
|
||||
"questions_added": self.questions_added,
|
||||
"duplicates_removed": self.duplicates_removed,
|
||||
"timestamp": self.timestamp
|
||||
}
|
||||
|
||||
|
||||
class ProgressService:
|
||||
"""Service for managing parsing progress"""
|
||||
|
||||
def __init__(self):
|
||||
# Store progress updates for each exam
|
||||
self._progress: Dict[int, ProgressUpdate] = {}
|
||||
# Store queues for SSE connections
|
||||
self._queues: Dict[int, list] = {}
|
||||
|
||||
async def update_progress(self, update: ProgressUpdate):
|
||||
"""
|
||||
Update progress for an exam and notify all listeners
|
||||
|
||||
Args:
|
||||
update: Progress update object
|
||||
"""
|
||||
exam_id = update.exam_id
|
||||
self._progress[exam_id] = update
|
||||
|
||||
# Send to all connected SSE clients for this exam
|
||||
if exam_id in self._queues:
|
||||
dead_queues = []
|
||||
for queue in self._queues[exam_id]:
|
||||
try:
|
||||
await queue.put(update)
|
||||
except Exception as e:
|
||||
print(f"[Progress] Failed to send update to queue: {e}")
|
||||
dead_queues.append(queue)
|
||||
|
||||
# Clean up dead queues
|
||||
for dead_queue in dead_queues:
|
||||
self._queues[exam_id].remove(dead_queue)
|
||||
|
||||
def get_progress(self, exam_id: int) -> Optional[ProgressUpdate]:
|
||||
"""Get current progress for an exam"""
|
||||
return self._progress.get(exam_id)
|
||||
|
||||
async def subscribe(self, exam_id: int) -> AsyncGenerator[ProgressUpdate, None]:
|
||||
"""
|
||||
Subscribe to progress updates for an exam (SSE stream)
|
||||
|
||||
Args:
|
||||
exam_id: Exam ID to subscribe to
|
||||
|
||||
Yields:
|
||||
Progress updates as they occur
|
||||
"""
|
||||
# Create a queue for this connection
|
||||
queue = asyncio.Queue()
|
||||
|
||||
# Register the queue
|
||||
if exam_id not in self._queues:
|
||||
self._queues[exam_id] = []
|
||||
self._queues[exam_id].append(queue)
|
||||
|
||||
try:
|
||||
# Send current progress if exists
|
||||
current_progress = self.get_progress(exam_id)
|
||||
if current_progress:
|
||||
yield current_progress
|
||||
|
||||
# Stream updates
|
||||
while True:
|
||||
update = await queue.get()
|
||||
yield update
|
||||
|
||||
# Stop streaming if completed or failed
|
||||
if update.status in [ProgressStatus.COMPLETED, ProgressStatus.FAILED]:
|
||||
break
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
if exam_id in self._queues and queue in self._queues[exam_id]:
|
||||
self._queues[exam_id].remove(queue)
|
||||
if not self._queues[exam_id]:
|
||||
del self._queues[exam_id]
|
||||
|
||||
def clear_progress(self, exam_id: int):
|
||||
"""Clear progress data for an exam"""
|
||||
if exam_id in self._progress:
|
||||
del self._progress[exam_id]
|
||||
if exam_id in self._queues:
|
||||
del self._queues[exam_id]
|
||||
|
||||
|
||||
# Singleton instance
|
||||
progress_service = ProgressService()
|
||||
121
frontend/src/components/ParsingProgress.jsx
Normal file
121
frontend/src/components/ParsingProgress.jsx
Normal file
@@ -0,0 +1,121 @@
|
||||
/**
|
||||
* Parsing Progress Component
|
||||
* Displays real-time progress for document parsing
|
||||
*/
|
||||
import React from 'react'
|
||||
import { Loader, CheckCircle, XCircle, FileText, Layers } from 'lucide-react'
|
||||
|
||||
export const ParsingProgress = ({ progress }) => {
|
||||
if (!progress) return null
|
||||
|
||||
const { status, message, progress: percentage, total_chunks, current_chunk, questions_extracted, questions_added, duplicates_removed } = progress
|
||||
|
||||
const getStatusIcon = () => {
|
||||
switch (status) {
|
||||
case 'completed':
|
||||
return <CheckCircle className="h-6 w-6 text-green-500" />
|
||||
case 'failed':
|
||||
return <XCircle className="h-6 w-6 text-red-500" />
|
||||
default:
|
||||
return <Loader className="h-6 w-6 text-primary-500 animate-spin" />
|
||||
}
|
||||
}
|
||||
|
||||
const getStatusColor = () => {
|
||||
switch (status) {
|
||||
case 'completed':
|
||||
return 'bg-green-500'
|
||||
case 'failed':
|
||||
return 'bg-red-500'
|
||||
case 'processing_chunk':
|
||||
return 'bg-blue-500'
|
||||
default:
|
||||
return 'bg-primary-500'
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="bg-white rounded-xl shadow-sm p-6 mb-6">
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="flex-shrink-0">
|
||||
{getStatusIcon()}
|
||||
</div>
|
||||
|
||||
<div className="flex-1">
|
||||
{/* Status Message */}
|
||||
<h3 className="text-lg font-semibold text-gray-900 mb-2">
|
||||
{status === 'completed' ? '解析完成' : status === 'failed' ? '解析失败' : '正在解析文档'}
|
||||
</h3>
|
||||
<p className="text-gray-600 mb-4">{message}</p>
|
||||
|
||||
{/* Progress Bar */}
|
||||
{status !== 'completed' && status !== 'failed' && (
|
||||
<div className="mb-4">
|
||||
<div className="flex justify-between text-sm text-gray-600 mb-2">
|
||||
<span>进度</span>
|
||||
<span>{percentage.toFixed(0)}%</span>
|
||||
</div>
|
||||
<div className="w-full bg-gray-200 rounded-full h-3 overflow-hidden">
|
||||
<div
|
||||
className={`h-3 ${getStatusColor()} transition-all duration-300 ease-out`}
|
||||
style={{ width: `${percentage}%` }}
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Details Grid */}
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mt-4">
|
||||
{total_chunks > 0 && (
|
||||
<div className="bg-blue-50 rounded-lg p-3">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<Layers className="h-4 w-4 text-blue-600" />
|
||||
<span className="text-xs text-blue-600 font-medium">文档拆分</span>
|
||||
</div>
|
||||
<p className="text-lg font-bold text-blue-900">
|
||||
{current_chunk}/{total_chunks}
|
||||
</p>
|
||||
<p className="text-xs text-blue-600">部分</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{questions_extracted > 0 && (
|
||||
<div className="bg-purple-50 rounded-lg p-3">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<FileText className="h-4 w-4 text-purple-600" />
|
||||
<span className="text-xs text-purple-600 font-medium">已提取</span>
|
||||
</div>
|
||||
<p className="text-lg font-bold text-purple-900">{questions_extracted}</p>
|
||||
<p className="text-xs text-purple-600">题目</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{questions_added > 0 && (
|
||||
<div className="bg-green-50 rounded-lg p-3">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<CheckCircle className="h-4 w-4 text-green-600" />
|
||||
<span className="text-xs text-green-600 font-medium">已添加</span>
|
||||
</div>
|
||||
<p className="text-lg font-bold text-green-900">{questions_added}</p>
|
||||
<p className="text-xs text-green-600">题目</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{duplicates_removed > 0 && (
|
||||
<div className="bg-orange-50 rounded-lg p-3">
|
||||
<div className="flex items-center gap-2 mb-1">
|
||||
<XCircle className="h-4 w-4 text-orange-600" />
|
||||
<span className="text-xs text-orange-600 font-medium">已去重</span>
|
||||
</div>
|
||||
<p className="text-lg font-bold text-orange-900">{duplicates_removed}</p>
|
||||
<p className="text-xs text-orange-600">题目</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default ParsingProgress
|
||||
@@ -1,10 +1,11 @@
|
||||
/**
|
||||
* Exam Detail Page - with append upload and status polling
|
||||
* Exam Detail Page - with real-time parsing progress via SSE
|
||||
*/
|
||||
import React, { useState, useEffect } from 'react'
|
||||
import React, { useState, useEffect, useRef } from 'react'
|
||||
import { useParams, useNavigate } from 'react-router-dom'
|
||||
import { examAPI, questionAPI } from '../api/client'
|
||||
import Layout from '../components/Layout'
|
||||
import ParsingProgress from '../components/ParsingProgress'
|
||||
import {
|
||||
ArrowLeft, Upload, Play, Loader, FileText, AlertCircle, RefreshCw
|
||||
} from 'lucide-react'
|
||||
@@ -28,16 +29,20 @@ export const ExamDetail = () => {
|
||||
const [uploading, setUploading] = useState(false)
|
||||
const [showUploadModal, setShowUploadModal] = useState(false)
|
||||
const [uploadFile, setUploadFile] = useState(null)
|
||||
const [progress, setProgress] = useState(null)
|
||||
|
||||
const eventSourceRef = useRef(null)
|
||||
|
||||
useEffect(() => {
|
||||
loadExamDetail()
|
||||
|
||||
// Start polling if status is processing
|
||||
const interval = setInterval(() => {
|
||||
pollExamStatus()
|
||||
}, 3000)
|
||||
|
||||
return () => clearInterval(interval)
|
||||
// Cleanup on unmount
|
||||
return () => {
|
||||
if (eventSourceRef.current) {
|
||||
eventSourceRef.current.close()
|
||||
eventSourceRef.current = null
|
||||
}
|
||||
}
|
||||
}, [examId])
|
||||
|
||||
const loadExamDetail = async () => {
|
||||
@@ -49,6 +54,11 @@ export const ExamDetail = () => {
|
||||
|
||||
setExam(examRes.data)
|
||||
setQuestions(questionsRes.data.questions)
|
||||
|
||||
// Connect to SSE if exam is processing
|
||||
if (examRes.data.status === 'processing') {
|
||||
connectSSE()
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load exam:', error)
|
||||
toast.error('加载题库失败')
|
||||
@@ -57,22 +67,53 @@ export const ExamDetail = () => {
|
||||
}
|
||||
}
|
||||
|
||||
const pollExamStatus = async () => {
|
||||
try {
|
||||
const response = await examAPI.getDetail(examId)
|
||||
const newExam = response.data
|
||||
|
||||
// If status changed from processing to ready
|
||||
if (exam?.status === 'processing' && newExam.status === 'ready') {
|
||||
toast.success('文档解析完成!')
|
||||
await loadExamDetail() // Reload to get updated questions
|
||||
} else if (exam?.status === 'processing' && newExam.status === 'failed') {
|
||||
toast.error('文档解析失败')
|
||||
const connectSSE = () => {
|
||||
// Close existing connection if any
|
||||
if (eventSourceRef.current) {
|
||||
eventSourceRef.current.close()
|
||||
}
|
||||
|
||||
setExam(newExam)
|
||||
console.log('[SSE] Connecting to progress stream for exam', examId)
|
||||
|
||||
const token = localStorage.getItem('token')
|
||||
const url = `/api/exams/${examId}/progress?token=${encodeURIComponent(token)}`
|
||||
|
||||
const eventSource = new EventSource(url)
|
||||
eventSourceRef.current = eventSource
|
||||
|
||||
eventSource.onmessage = (event) => {
|
||||
try {
|
||||
const progressData = JSON.parse(event.data)
|
||||
console.log('[SSE] Progress update:', progressData)
|
||||
|
||||
setProgress(progressData)
|
||||
|
||||
// Update exam status if completed or failed
|
||||
if (progressData.status === 'completed') {
|
||||
toast.success(progressData.message)
|
||||
setExam(prev => ({ ...prev, status: 'ready' }))
|
||||
loadExamDetail() // Reload to get updated questions
|
||||
eventSource.close()
|
||||
eventSourceRef.current = null
|
||||
} else if (progressData.status === 'failed') {
|
||||
toast.error(progressData.message)
|
||||
setExam(prev => ({ ...prev, status: 'failed' }))
|
||||
eventSource.close()
|
||||
eventSourceRef.current = null
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to poll exam:', error)
|
||||
console.error('[SSE] Failed to parse progress data:', error)
|
||||
}
|
||||
}
|
||||
|
||||
eventSource.onerror = (error) => {
|
||||
console.error('[SSE] Connection error:', error)
|
||||
eventSource.close()
|
||||
eventSourceRef.current = null
|
||||
}
|
||||
|
||||
eventSource.onopen = () => {
|
||||
console.log('[SSE] Connection established')
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,9 +137,13 @@ export const ExamDetail = () => {
|
||||
toast.success('文档上传成功,正在解析并去重...')
|
||||
setShowUploadModal(false)
|
||||
setUploadFile(null)
|
||||
await loadExamDetail()
|
||||
setExam(prev => ({ ...prev, status: 'processing' }))
|
||||
|
||||
// Connect to SSE for real-time progress
|
||||
connectSSE()
|
||||
} catch (error) {
|
||||
console.error('Failed to append document:', error)
|
||||
toast.error('文档上传失败')
|
||||
} finally {
|
||||
setUploading(false)
|
||||
}
|
||||
@@ -138,7 +183,7 @@ export const ExamDetail = () => {
|
||||
const isProcessing = exam.status === 'processing'
|
||||
const isReady = exam.status === 'ready'
|
||||
const isFailed = exam.status === 'failed'
|
||||
const progress = calculateProgress(exam.current_index, exam.total_questions)
|
||||
const quizProgress = calculateProgress(exam.current_index, exam.total_questions)
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
@@ -152,6 +197,11 @@ export const ExamDetail = () => {
|
||||
返回题库列表
|
||||
</button>
|
||||
|
||||
{/* Parsing Progress (only shown when processing) */}
|
||||
{isProcessing && progress && (
|
||||
<ParsingProgress progress={progress} />
|
||||
)}
|
||||
|
||||
{/* Header */}
|
||||
<div className="bg-white rounded-xl shadow-sm p-6 mb-6">
|
||||
<div className="flex flex-col md:flex-row md:items-start md:justify-between mb-4">
|
||||
@@ -223,7 +273,7 @@ export const ExamDetail = () => {
|
||||
<div className="w-full bg-gray-200 rounded-full h-3">
|
||||
<div
|
||||
className="bg-primary-600 h-3 rounded-full transition-all"
|
||||
style={{ width: `${progress}%` }}
|
||||
style={{ width: `${quizProgress}%` }}
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user