长文本拆分,前端反馈还未成功

This commit is contained in:
2025-12-12 23:59:28 +08:00
parent a39f2d9e33
commit f5dd3bfc6c
7 changed files with 605 additions and 43 deletions

View File

@@ -21,15 +21,10 @@ FROM python:3.11-slim
WORKDIR /app WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# 复制后端依赖文件 # 复制后端依赖文件
COPY backend/requirements.txt ./ COPY backend/requirements.txt ./
# 安装 Python 依赖 # 安装 Python 依赖使用预编译wheel包无需gcc
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
# 复制后端代码 # 复制后端代码

View File

@@ -2,12 +2,14 @@
Exam Router - Handles exam creation, file upload, and deduplication Exam Router - Handles exam creation, file upload, and deduplication
""" """
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form, BackgroundTasks from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form, BackgroundTasks
from fastapi.responses import StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, and_ from sqlalchemy import select, func, and_
from typing import List, Optional from typing import List, Optional
from datetime import datetime, timedelta from datetime import datetime, timedelta
import os import os
import aiofiles import aiofiles
import json
from database import get_db from database import get_db
from models import User, Exam, Question, ExamStatus, SystemConfig from models import User, Exam, Question, ExamStatus, SystemConfig
@@ -19,6 +21,7 @@ from services.auth_service import get_current_user
from services.document_parser import document_parser from services.document_parser import document_parser
from services.llm_service import LLMService from services.llm_service import LLMService
from services.config_service import load_llm_config from services.config_service import load_llm_config
from services.progress_service import progress_service
from utils import is_allowed_file, calculate_content_hash from utils import is_allowed_file, calculate_content_hash
from dedup_utils import is_duplicate_question from dedup_utils import is_duplicate_question
@@ -264,9 +267,11 @@ async def async_parse_and_save(
): ):
""" """
Background task to parse document and save questions with deduplication. Background task to parse document and save questions with deduplication.
Sends real-time progress updates via SSE.
""" """
from database import AsyncSessionLocal from database import AsyncSessionLocal
from sqlalchemy import select from sqlalchemy import select
from services.progress_service import ProgressUpdate, ProgressStatus
async with AsyncSessionLocal() as db: async with AsyncSessionLocal() as db:
try: try:
@@ -276,6 +281,14 @@ async def async_parse_and_save(
exam.status = ExamStatus.PROCESSING exam.status = ExamStatus.PROCESSING
await db.commit() await db.commit()
# Send initial progress
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.PARSING,
message="开始解析文档...",
progress=5.0
))
# Load LLM configuration from database # Load LLM configuration from database
llm_config = await load_llm_config(db) llm_config = await load_llm_config(db)
llm_service = LLMService(config=llm_config) llm_service = LLMService(config=llm_config)
@@ -293,12 +306,27 @@ async def async_parse_and_save(
# Use Gemini's native PDF processing # Use Gemini's native PDF processing
print(f"[Exam {exam_id}] Using Gemini native PDF processing", flush=True) print(f"[Exam {exam_id}] Using Gemini native PDF processing", flush=True)
print(f"[Exam {exam_id}] PDF file size: {len(file_content)} bytes", flush=True) print(f"[Exam {exam_id}] PDF file size: {len(file_content)} bytes", flush=True)
questions_data = await llm_service.parse_document_with_pdf(file_content, filename)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.PARSING,
message="使用Gemini解析PDF文档...",
progress=10.0
))
questions_data = await llm_service.parse_document_with_pdf(file_content, filename, exam_id)
else: else:
# Extract text first, then parse # Extract text first, then parse
if is_pdf: if is_pdf:
print(f"[Exam {exam_id}] ⚠️ Warning: Using text extraction for PDF (provider does not support native PDF)", flush=True) print(f"[Exam {exam_id}] ⚠️ Warning: Using text extraction for PDF (provider does not support native PDF)", flush=True)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.PARSING,
message="提取文档文本内容...",
progress=10.0
))
print(f"[Exam {exam_id}] Extracting text from document...", flush=True) print(f"[Exam {exam_id}] Extracting text from document...", flush=True)
text_content = await document_parser.parse_file(file_content, filename) text_content = await document_parser.parse_file(file_content, filename)
@@ -309,17 +337,40 @@ async def async_parse_and_save(
# Check if document is too long and needs splitting # Check if document is too long and needs splitting
if len(text_content) > 5000: if len(text_content) > 5000:
print(f"[Exam {exam_id}] Document is long, splitting into chunks...", flush=True)
text_chunks = document_parser.split_text_with_overlap(text_content, chunk_size=3000, overlap=1000) text_chunks = document_parser.split_text_with_overlap(text_content, chunk_size=3000, overlap=1000)
print(f"[Exam {exam_id}] Split into {len(text_chunks)} chunks", flush=True) total_chunks = len(text_chunks)
print(f"[Exam {exam_id}] Document is long, splitting into chunks...", flush=True)
print(f"[Exam {exam_id}] Split into {total_chunks} chunks", flush=True)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.SPLITTING,
message=f"文档已拆分为 {total_chunks} 个部分",
progress=15.0,
total_chunks=total_chunks
))
all_questions = [] all_questions = []
for chunk_idx, chunk in enumerate(text_chunks): for chunk_idx, chunk in enumerate(text_chunks):
print(f"[Exam {exam_id}] Processing chunk {chunk_idx + 1}/{len(text_chunks)}...", flush=True) current_chunk = chunk_idx + 1
chunk_progress = 15.0 + (60.0 * current_chunk / total_chunks)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.PROCESSING_CHUNK,
message=f"正在处理第 {current_chunk}/{total_chunks} 部分...",
progress=chunk_progress,
total_chunks=total_chunks,
current_chunk=current_chunk,
questions_extracted=len(all_questions)
))
print(f"[Exam {exam_id}] Processing chunk {current_chunk}/{total_chunks}...", flush=True)
try: try:
chunk_questions = await llm_service.parse_document(chunk) chunk_questions = await llm_service.parse_document(chunk)
print(f"[Exam {exam_id}] Chunk {chunk_idx + 1} extracted {len(chunk_questions)} questions", flush=True) print(f"[Exam {exam_id}] Chunk {current_chunk} extracted {len(chunk_questions)} questions", flush=True)
# Fuzzy deduplicate across chunks # Fuzzy deduplicate across chunks
for q in chunk_questions: for q in chunk_questions:
@@ -327,7 +378,7 @@ async def async_parse_and_save(
if not is_duplicate_question(q, all_questions, threshold=0.85): if not is_duplicate_question(q, all_questions, threshold=0.85):
all_questions.append(q) all_questions.append(q)
else: else:
print(f"[Exam {exam_id}] Skipped fuzzy duplicate from chunk {chunk_idx + 1}", flush=True) print(f"[Exam {exam_id}] Skipped fuzzy duplicate from chunk {current_chunk}", flush=True)
except Exception as chunk_error: except Exception as chunk_error:
print(f"[Exam {exam_id}] Chunk {chunk_idx + 1} failed: {str(chunk_error)}", flush=True) print(f"[Exam {exam_id}] Chunk {chunk_idx + 1} failed: {str(chunk_error)}", flush=True)
@@ -335,11 +386,37 @@ async def async_parse_and_save(
questions_data = all_questions questions_data = all_questions
print(f"[Exam {exam_id}] Total questions after fuzzy deduplication: {len(questions_data)}", flush=True) print(f"[Exam {exam_id}] Total questions after fuzzy deduplication: {len(questions_data)}", flush=True)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.DEDUPLICATING,
message=f"所有部分处理完成,提取了 {len(questions_data)} 个题目",
progress=75.0,
total_chunks=total_chunks,
current_chunk=total_chunks,
questions_extracted=len(questions_data)
))
else: else:
print(f"[Exam {exam_id}] Document content preview:\n{text_content[:500]}\n{'...' if len(text_content) > 500 else ''}", flush=True) print(f"[Exam {exam_id}] Document content preview:\n{text_content[:500]}\n{'...' if len(text_content) > 500 else ''}", flush=True)
print(f"[Exam {exam_id}] Calling LLM to extract questions...", flush=True) print(f"[Exam {exam_id}] Calling LLM to extract questions...", flush=True)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.PARSING,
message="正在提取题目...",
progress=30.0
))
questions_data = await llm_service.parse_document(text_content) questions_data = await llm_service.parse_document(text_content)
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.DEDUPLICATING,
message=f"提取了 {len(questions_data)} 个题目",
progress=60.0,
questions_extracted=len(questions_data)
))
except Exception as parse_error: except Exception as parse_error:
print(f"[Exam {exam_id}] ⚠️ Parse error details: {type(parse_error).__name__}", flush=True) print(f"[Exam {exam_id}] ⚠️ Parse error details: {type(parse_error).__name__}", flush=True)
print(f"[Exam {exam_id}] ⚠️ Parse error message: {str(parse_error)}", flush=True) print(f"[Exam {exam_id}] ⚠️ Parse error message: {str(parse_error)}", flush=True)
@@ -351,6 +428,14 @@ async def async_parse_and_save(
raise Exception("No questions found in document") raise Exception("No questions found in document")
# Process questions with deduplication and AI answer generation # Process questions with deduplication and AI answer generation
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.SAVING,
message="正在去重并保存题目到数据库...",
progress=80.0,
questions_extracted=len(questions_data)
))
print(f"[Exam {exam_id}] Processing questions with deduplication...") print(f"[Exam {exam_id}] Processing questions with deduplication...")
parse_result = await process_questions_with_dedup(exam_id, questions_data, db, llm_service) parse_result = await process_questions_with_dedup(exam_id, questions_data, db, llm_service)
@@ -370,9 +455,28 @@ async def async_parse_and_save(
print(f"[Exam {exam_id}] ✅ {parse_result.message}") print(f"[Exam {exam_id}] ✅ {parse_result.message}")
# Send completion progress
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.COMPLETED,
message=f"完成!添加了 {parse_result.new_added} 个题目(去重 {parse_result.duplicates_removed} 个)",
progress=100.0,
questions_extracted=parse_result.total_parsed,
questions_added=parse_result.new_added,
duplicates_removed=parse_result.duplicates_removed
))
except Exception as e: except Exception as e:
print(f"[Exam {exam_id}] ❌ Error: {str(e)}") print(f"[Exam {exam_id}] ❌ Error: {str(e)}")
# Send error progress
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.FAILED,
message=f"处理失败:{str(e)}",
progress=0.0
))
# Update exam status to failed # Update exam status to failed
result = await db.execute(select(Exam).where(Exam.id == exam_id)) result = await db.execute(select(Exam).where(Exam.id == exam_id))
exam = result.scalar_one() exam = result.scalar_one()
@@ -549,6 +653,70 @@ async def get_exam_detail(
return exam return exam
@router.get("/{exam_id}/progress")
async def get_exam_progress(
exam_id: int,
token: Optional[str] = None,
db: AsyncSession = Depends(get_db)
):
"""
Get real-time progress updates for exam document parsing (SSE endpoint)
Returns Server-Sent Events stream with progress updates
"""
# Authenticate using token from query parameter (EventSource doesn't support custom headers)
from services.auth_service import get_current_user_from_token
if not token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Token required"
)
try:
current_user = await get_current_user_from_token(token, db)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token"
)
# Verify exam belongs to user
result = await db.execute(
select(Exam).where(
and_(Exam.id == exam_id, Exam.user_id == current_user.id)
)
)
exam = result.scalar_one_or_none()
if not exam:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Exam not found"
)
async def event_generator():
"""Generate SSE events"""
async for update in progress_service.subscribe(exam_id):
# Format as SSE
data = json.dumps(update.to_dict())
yield f"data: {data}\n\n"
# Stop if completed or failed
if update.status in ["completed", "failed"]:
break
return StreamingResponse(
event_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no" # Disable nginx buffering
}
)
@router.delete("/{exam_id}", status_code=status.HTTP_204_NO_CONTENT) @router.delete("/{exam_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_exam( async def delete_exam(
exam_id: int, exam_id: int,

View File

@@ -82,3 +82,42 @@ async def get_optional_user(
return await get_current_user(credentials, db) return await get_current_user(credentials, db)
except HTTPException: except HTTPException:
return None return None
async def get_current_user_from_token(token: str, db: AsyncSession) -> User:
"""
Get current user from JWT token string (for SSE with query params).
Args:
token: JWT token string
db: Database session
Returns:
User object
Raises:
Exception: If token is invalid or user not found
"""
# Decode token
payload = decode_access_token(token)
if payload is None:
raise Exception("Invalid token")
user_id = payload.get("sub")
if user_id is None:
raise Exception("Invalid token payload")
# Convert user_id to int if it's a string
try:
user_id = int(user_id)
except (ValueError, TypeError):
raise Exception("Invalid user ID")
# Get user from database
result = await db.execute(select(User).where(User.id == user_id))
user = result.scalar_one_or_none()
if user is None:
raise Exception("User not found")
return user

View File

@@ -453,7 +453,7 @@ class LLMService:
return chunks return chunks
async def parse_document_with_pdf(self, pdf_bytes: bytes, filename: str) -> List[Dict[str, Any]]: async def parse_document_with_pdf(self, pdf_bytes: bytes, filename: str, exam_id: int = None) -> List[Dict[str, Any]]:
""" """
Parse PDF document using Gemini's native PDF understanding. Parse PDF document using Gemini's native PDF understanding.
Automatically splits large PDFs into overlapping chunks. Automatically splits large PDFs into overlapping chunks.
@@ -462,6 +462,7 @@ class LLMService:
Args: Args:
pdf_bytes: PDF file content as bytes pdf_bytes: PDF file content as bytes
filename: Original filename for logging filename: Original filename for logging
exam_id: Optional exam ID for progress updates
Returns: Returns:
List of question dictionaries List of question dictionaries
@@ -471,17 +472,44 @@ class LLMService:
# Split PDF into chunks # Split PDF into chunks
pdf_chunks = self.split_pdf_pages(pdf_bytes, pages_per_chunk=4, overlap=1) pdf_chunks = self.split_pdf_pages(pdf_bytes, pages_per_chunk=4, overlap=1)
total_chunks = len(pdf_chunks)
print(f"[Gemini PDF] Processing {len(pdf_chunks)} chunk(s) for {filename}") print(f"[Gemini PDF] Processing {total_chunks} chunk(s) for {filename}")
# Send progress update if exam_id provided
if exam_id:
from services.progress_service import progress_service, ProgressUpdate, ProgressStatus
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.SPLITTING,
message=f"PDF已拆分为 {total_chunks} 个部分",
progress=15.0,
total_chunks=total_chunks
))
all_questions = [] all_questions = []
# Process each chunk with fuzzy deduplication # Process each chunk with fuzzy deduplication
for chunk_idx, chunk_bytes in enumerate(pdf_chunks): for chunk_idx, chunk_bytes in enumerate(pdf_chunks):
print(f"[Gemini PDF] Processing chunk {chunk_idx + 1}/{len(pdf_chunks)}") current_chunk = chunk_idx + 1
chunk_progress = 15.0 + (60.0 * current_chunk / total_chunks)
print(f"[Gemini PDF] Processing chunk {current_chunk}/{total_chunks}")
# Send progress update
if exam_id:
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.PROCESSING_CHUNK,
message=f"正在处理第 {current_chunk}/{total_chunks} 部分...",
progress=chunk_progress,
total_chunks=total_chunks,
current_chunk=current_chunk,
questions_extracted=len(all_questions)
))
try: try:
questions = await self._parse_pdf_chunk(chunk_bytes, f"{filename}_chunk_{chunk_idx + 1}") questions = await self._parse_pdf_chunk(chunk_bytes, f"{filename}_chunk_{current_chunk}")
print(f"[Gemini PDF] Chunk {chunk_idx + 1} extracted {len(questions)} questions") print(f"[Gemini PDF] Chunk {current_chunk} extracted {len(questions)} questions")
# Fuzzy deduplicate across chunks # Fuzzy deduplicate across chunks
from dedup_utils import is_duplicate_question from dedup_utils import is_duplicate_question
@@ -490,15 +518,27 @@ class LLMService:
if not is_duplicate_question(q, all_questions, threshold=0.85): if not is_duplicate_question(q, all_questions, threshold=0.85):
all_questions.append(q) all_questions.append(q)
else: else:
print(f"[PDF Split] Skipped fuzzy duplicate from chunk {chunk_idx + 1}") print(f"[PDF Split] Skipped fuzzy duplicate from chunk {current_chunk}")
except Exception as e: except Exception as e:
print(f"[Gemini PDF] Chunk {chunk_idx + 1} failed: {str(e)}") print(f"[Gemini PDF] Chunk {current_chunk} failed: {str(e)}")
# Continue with other chunks # Continue with other chunks
continue continue
print(f"[Gemini PDF] Total questions extracted: {len(all_questions)} (after deduplication)") print(f"[Gemini PDF] Total questions extracted: {len(all_questions)} (after deduplication)")
# Send final progress for PDF processing
if exam_id:
await progress_service.update_progress(ProgressUpdate(
exam_id=exam_id,
status=ProgressStatus.DEDUPLICATING,
message=f"PDF处理完成提取了 {len(all_questions)} 个题目",
progress=75.0,
total_chunks=total_chunks,
current_chunk=total_chunks,
questions_extracted=len(all_questions)
))
return all_questions return all_questions
async def _parse_pdf_chunk(self, pdf_bytes: bytes, chunk_name: str) -> List[Dict[str, Any]]: async def _parse_pdf_chunk(self, pdf_bytes: bytes, chunk_name: str) -> List[Dict[str, Any]]:

View File

@@ -0,0 +1,149 @@
"""
Progress Service - Manages document parsing progress for real-time updates
"""
import asyncio
from typing import Dict, Optional, AsyncGenerator
from datetime import datetime
from enum import Enum
class ProgressStatus(str, Enum):
"""Progress status types"""
PENDING = "pending"
PARSING = "parsing"
SPLITTING = "splitting"
PROCESSING_CHUNK = "processing_chunk"
DEDUPLICATING = "deduplicating"
SAVING = "saving"
COMPLETED = "completed"
FAILED = "failed"
class ProgressUpdate:
"""Progress update data structure"""
def __init__(
self,
exam_id: int,
status: ProgressStatus,
message: str,
progress: float = 0.0,
total_chunks: int = 0,
current_chunk: int = 0,
questions_extracted: int = 0,
questions_added: int = 0,
duplicates_removed: int = 0
):
self.exam_id = exam_id
self.status = status
self.message = message
self.progress = progress # 0-100
self.total_chunks = total_chunks
self.current_chunk = current_chunk
self.questions_extracted = questions_extracted
self.questions_added = questions_added
self.duplicates_removed = duplicates_removed
self.timestamp = datetime.now().isoformat()
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization"""
return {
"exam_id": self.exam_id,
"status": self.status.value,
"message": self.message,
"progress": round(self.progress, 1),
"total_chunks": self.total_chunks,
"current_chunk": self.current_chunk,
"questions_extracted": self.questions_extracted,
"questions_added": self.questions_added,
"duplicates_removed": self.duplicates_removed,
"timestamp": self.timestamp
}
class ProgressService:
"""Service for managing parsing progress"""
def __init__(self):
# Store progress updates for each exam
self._progress: Dict[int, ProgressUpdate] = {}
# Store queues for SSE connections
self._queues: Dict[int, list] = {}
async def update_progress(self, update: ProgressUpdate):
"""
Update progress for an exam and notify all listeners
Args:
update: Progress update object
"""
exam_id = update.exam_id
self._progress[exam_id] = update
# Send to all connected SSE clients for this exam
if exam_id in self._queues:
dead_queues = []
for queue in self._queues[exam_id]:
try:
await queue.put(update)
except Exception as e:
print(f"[Progress] Failed to send update to queue: {e}")
dead_queues.append(queue)
# Clean up dead queues
for dead_queue in dead_queues:
self._queues[exam_id].remove(dead_queue)
def get_progress(self, exam_id: int) -> Optional[ProgressUpdate]:
"""Get current progress for an exam"""
return self._progress.get(exam_id)
async def subscribe(self, exam_id: int) -> AsyncGenerator[ProgressUpdate, None]:
"""
Subscribe to progress updates for an exam (SSE stream)
Args:
exam_id: Exam ID to subscribe to
Yields:
Progress updates as they occur
"""
# Create a queue for this connection
queue = asyncio.Queue()
# Register the queue
if exam_id not in self._queues:
self._queues[exam_id] = []
self._queues[exam_id].append(queue)
try:
# Send current progress if exists
current_progress = self.get_progress(exam_id)
if current_progress:
yield current_progress
# Stream updates
while True:
update = await queue.get()
yield update
# Stop streaming if completed or failed
if update.status in [ProgressStatus.COMPLETED, ProgressStatus.FAILED]:
break
finally:
# Cleanup
if exam_id in self._queues and queue in self._queues[exam_id]:
self._queues[exam_id].remove(queue)
if not self._queues[exam_id]:
del self._queues[exam_id]
def clear_progress(self, exam_id: int):
"""Clear progress data for an exam"""
if exam_id in self._progress:
del self._progress[exam_id]
if exam_id in self._queues:
del self._queues[exam_id]
# Singleton instance
progress_service = ProgressService()

View File

@@ -0,0 +1,121 @@
/**
* Parsing Progress Component
* Displays real-time progress for document parsing
*/
import React from 'react'
import { Loader, CheckCircle, XCircle, FileText, Layers } from 'lucide-react'
export const ParsingProgress = ({ progress }) => {
if (!progress) return null
const { status, message, progress: percentage, total_chunks, current_chunk, questions_extracted, questions_added, duplicates_removed } = progress
const getStatusIcon = () => {
switch (status) {
case 'completed':
return <CheckCircle className="h-6 w-6 text-green-500" />
case 'failed':
return <XCircle className="h-6 w-6 text-red-500" />
default:
return <Loader className="h-6 w-6 text-primary-500 animate-spin" />
}
}
const getStatusColor = () => {
switch (status) {
case 'completed':
return 'bg-green-500'
case 'failed':
return 'bg-red-500'
case 'processing_chunk':
return 'bg-blue-500'
default:
return 'bg-primary-500'
}
}
return (
<div className="bg-white rounded-xl shadow-sm p-6 mb-6">
<div className="flex items-start gap-4">
<div className="flex-shrink-0">
{getStatusIcon()}
</div>
<div className="flex-1">
{/* Status Message */}
<h3 className="text-lg font-semibold text-gray-900 mb-2">
{status === 'completed' ? '解析完成' : status === 'failed' ? '解析失败' : '正在解析文档'}
</h3>
<p className="text-gray-600 mb-4">{message}</p>
{/* Progress Bar */}
{status !== 'completed' && status !== 'failed' && (
<div className="mb-4">
<div className="flex justify-between text-sm text-gray-600 mb-2">
<span>进度</span>
<span>{percentage.toFixed(0)}%</span>
</div>
<div className="w-full bg-gray-200 rounded-full h-3 overflow-hidden">
<div
className={`h-3 ${getStatusColor()} transition-all duration-300 ease-out`}
style={{ width: `${percentage}%` }}
></div>
</div>
</div>
)}
{/* Details Grid */}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 mt-4">
{total_chunks > 0 && (
<div className="bg-blue-50 rounded-lg p-3">
<div className="flex items-center gap-2 mb-1">
<Layers className="h-4 w-4 text-blue-600" />
<span className="text-xs text-blue-600 font-medium">文档拆分</span>
</div>
<p className="text-lg font-bold text-blue-900">
{current_chunk}/{total_chunks}
</p>
<p className="text-xs text-blue-600">部分</p>
</div>
)}
{questions_extracted > 0 && (
<div className="bg-purple-50 rounded-lg p-3">
<div className="flex items-center gap-2 mb-1">
<FileText className="h-4 w-4 text-purple-600" />
<span className="text-xs text-purple-600 font-medium">已提取</span>
</div>
<p className="text-lg font-bold text-purple-900">{questions_extracted}</p>
<p className="text-xs text-purple-600">题目</p>
</div>
)}
{questions_added > 0 && (
<div className="bg-green-50 rounded-lg p-3">
<div className="flex items-center gap-2 mb-1">
<CheckCircle className="h-4 w-4 text-green-600" />
<span className="text-xs text-green-600 font-medium">已添加</span>
</div>
<p className="text-lg font-bold text-green-900">{questions_added}</p>
<p className="text-xs text-green-600">题目</p>
</div>
)}
{duplicates_removed > 0 && (
<div className="bg-orange-50 rounded-lg p-3">
<div className="flex items-center gap-2 mb-1">
<XCircle className="h-4 w-4 text-orange-600" />
<span className="text-xs text-orange-600 font-medium">已去重</span>
</div>
<p className="text-lg font-bold text-orange-900">{duplicates_removed}</p>
<p className="text-xs text-orange-600">题目</p>
</div>
)}
</div>
</div>
</div>
</div>
)
}
export default ParsingProgress

View File

@@ -1,10 +1,11 @@
/** /**
* Exam Detail Page - with append upload and status polling * Exam Detail Page - with real-time parsing progress via SSE
*/ */
import React, { useState, useEffect } from 'react' import React, { useState, useEffect, useRef } from 'react'
import { useParams, useNavigate } from 'react-router-dom' import { useParams, useNavigate } from 'react-router-dom'
import { examAPI, questionAPI } from '../api/client' import { examAPI, questionAPI } from '../api/client'
import Layout from '../components/Layout' import Layout from '../components/Layout'
import ParsingProgress from '../components/ParsingProgress'
import { import {
ArrowLeft, Upload, Play, Loader, FileText, AlertCircle, RefreshCw ArrowLeft, Upload, Play, Loader, FileText, AlertCircle, RefreshCw
} from 'lucide-react' } from 'lucide-react'
@@ -28,16 +29,20 @@ export const ExamDetail = () => {
const [uploading, setUploading] = useState(false) const [uploading, setUploading] = useState(false)
const [showUploadModal, setShowUploadModal] = useState(false) const [showUploadModal, setShowUploadModal] = useState(false)
const [uploadFile, setUploadFile] = useState(null) const [uploadFile, setUploadFile] = useState(null)
const [progress, setProgress] = useState(null)
const eventSourceRef = useRef(null)
useEffect(() => { useEffect(() => {
loadExamDetail() loadExamDetail()
// Start polling if status is processing // Cleanup on unmount
const interval = setInterval(() => { return () => {
pollExamStatus() if (eventSourceRef.current) {
}, 3000) eventSourceRef.current.close()
eventSourceRef.current = null
return () => clearInterval(interval) }
}
}, [examId]) }, [examId])
const loadExamDetail = async () => { const loadExamDetail = async () => {
@@ -49,6 +54,11 @@ export const ExamDetail = () => {
setExam(examRes.data) setExam(examRes.data)
setQuestions(questionsRes.data.questions) setQuestions(questionsRes.data.questions)
// Connect to SSE if exam is processing
if (examRes.data.status === 'processing') {
connectSSE()
}
} catch (error) { } catch (error) {
console.error('Failed to load exam:', error) console.error('Failed to load exam:', error)
toast.error('加载题库失败') toast.error('加载题库失败')
@@ -57,22 +67,53 @@ export const ExamDetail = () => {
} }
} }
const pollExamStatus = async () => { const connectSSE = () => {
try { // Close existing connection if any
const response = await examAPI.getDetail(examId) if (eventSourceRef.current) {
const newExam = response.data eventSourceRef.current.close()
// If status changed from processing to ready
if (exam?.status === 'processing' && newExam.status === 'ready') {
toast.success('文档解析完成!')
await loadExamDetail() // Reload to get updated questions
} else if (exam?.status === 'processing' && newExam.status === 'failed') {
toast.error('文档解析失败')
} }
setExam(newExam) console.log('[SSE] Connecting to progress stream for exam', examId)
const token = localStorage.getItem('token')
const url = `/api/exams/${examId}/progress?token=${encodeURIComponent(token)}`
const eventSource = new EventSource(url)
eventSourceRef.current = eventSource
eventSource.onmessage = (event) => {
try {
const progressData = JSON.parse(event.data)
console.log('[SSE] Progress update:', progressData)
setProgress(progressData)
// Update exam status if completed or failed
if (progressData.status === 'completed') {
toast.success(progressData.message)
setExam(prev => ({ ...prev, status: 'ready' }))
loadExamDetail() // Reload to get updated questions
eventSource.close()
eventSourceRef.current = null
} else if (progressData.status === 'failed') {
toast.error(progressData.message)
setExam(prev => ({ ...prev, status: 'failed' }))
eventSource.close()
eventSourceRef.current = null
}
} catch (error) { } catch (error) {
console.error('Failed to poll exam:', error) console.error('[SSE] Failed to parse progress data:', error)
}
}
eventSource.onerror = (error) => {
console.error('[SSE] Connection error:', error)
eventSource.close()
eventSourceRef.current = null
}
eventSource.onopen = () => {
console.log('[SSE] Connection established')
} }
} }
@@ -96,9 +137,13 @@ export const ExamDetail = () => {
toast.success('文档上传成功,正在解析并去重...') toast.success('文档上传成功,正在解析并去重...')
setShowUploadModal(false) setShowUploadModal(false)
setUploadFile(null) setUploadFile(null)
await loadExamDetail() setExam(prev => ({ ...prev, status: 'processing' }))
// Connect to SSE for real-time progress
connectSSE()
} catch (error) { } catch (error) {
console.error('Failed to append document:', error) console.error('Failed to append document:', error)
toast.error('文档上传失败')
} finally { } finally {
setUploading(false) setUploading(false)
} }
@@ -138,7 +183,7 @@ export const ExamDetail = () => {
const isProcessing = exam.status === 'processing' const isProcessing = exam.status === 'processing'
const isReady = exam.status === 'ready' const isReady = exam.status === 'ready'
const isFailed = exam.status === 'failed' const isFailed = exam.status === 'failed'
const progress = calculateProgress(exam.current_index, exam.total_questions) const quizProgress = calculateProgress(exam.current_index, exam.total_questions)
return ( return (
<Layout> <Layout>
@@ -152,6 +197,11 @@ export const ExamDetail = () => {
返回题库列表 返回题库列表
</button> </button>
{/* Parsing Progress (only shown when processing) */}
{isProcessing && progress && (
<ParsingProgress progress={progress} />
)}
{/* Header */} {/* Header */}
<div className="bg-white rounded-xl shadow-sm p-6 mb-6"> <div className="bg-white rounded-xl shadow-sm p-6 mb-6">
<div className="flex flex-col md:flex-row md:items-start md:justify-between mb-4"> <div className="flex flex-col md:flex-row md:items-start md:justify-between mb-4">
@@ -223,7 +273,7 @@ export const ExamDetail = () => {
<div className="w-full bg-gray-200 rounded-full h-3"> <div className="w-full bg-gray-200 rounded-full h-3">
<div <div
className="bg-primary-600 h-3 rounded-full transition-all" className="bg-primary-600 h-3 rounded-full transition-all"
style={{ width: `${progress}%` }} style={{ width: `${quizProgress}%` }}
></div> ></div>
</div> </div>
</div> </div>