feat: 添加 Gemini 支持和 AI 参考答案生成功能

主要功能： - 🎯 新增 Google Gemini AI 提供商支持 - 原生 PDF 理解能力（最多1000页） - 完整保留图片、表格、公式等内容 - 支持自定义 Base URL（用于代理/中转服务） - 🤖 实现 AI 参考答案自动生成 - 当题目缺少答案时自动调用 AI 生成参考答案 - 支持单选、多选、判断、简答等所有题型 - 答案标记为"AI参考答案："便于识别 - 🔧 优化文档解析功能 - 改进中文 Prompt 提高识别准确度 - 自动修复 JSON 中的控制字符（换行符等） - 智能题目类型验证和自动转换（proof→short等） - 增加超时时间和重试机制 - 🎨 完善管理后台配置界面 - 新增 Gemini 配置区域 - 突出显示 PDF 原生支持特性 - 为其他提供商添加"仅文本"警告 - 支持 Gemini Base URL 自定义技术改进： - 添加 google-genai 依赖 - 实现异步 API 调用适配 - 完善错误处理和日志输出 - 统一配置管理和数据库存储 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-18 22:42:53 +00:00 · 2025-12-01 22:43:08 +08:00
parent a01f3540c5
commit d24a1a1f92
7 changed files with 824 additions and 63 deletions
--- a/backend/services/llm_service.py
+++ b/backend/services/llm_service.py
@@ -6,6 +6,8 @@ import json
 from typing import List, Dict, Any, Optional
 from openai import AsyncOpenAI
 from anthropic import AsyncAnthropic
+from google import genai
+from google.genai import types
 import httpx

 from models import QuestionType
@@ -36,9 +38,17 @@ class LLMService:

            self.client = AsyncOpenAI(
                api_key=api_key,
-                base_url=base_url
+                base_url=base_url,
+                timeout=120.0,  # 增加超时时间到 120 秒
+                max_retries=3   # 自动重试 3 次
            )

+            # Log configuration for debugging
+            print(f"[LLM Config] Provider: OpenAI", flush=True)
+            print(f"[LLM Config] Base URL: {base_url}", flush=True)
+            print(f"[LLM Config] Model: {self.model}", flush=True)
+            print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
+
        elif self.provider == "anthropic":
            api_key = (config or {}).get("anthropic_api_key") or os.getenv("ANTHROPIC_API_KEY")
            self.model = (config or {}).get("anthropic_model") or os.getenv("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
@@ -60,9 +70,58 @@ class LLMService:

            self.client = AsyncOpenAI(
                api_key=api_key,
-                base_url=base_url
+                base_url=base_url,
+                timeout=120.0,  # 增加超时时间到 120 秒
+                max_retries=3   # 自动重试 3 次
            )

+        elif self.provider == "gemini":
+            api_key = (config or {}).get("gemini_api_key") or os.getenv("GEMINI_API_KEY")
+            base_url = (config or {}).get("gemini_base_url") or os.getenv("GEMINI_BASE_URL")
+            self.model = (config or {}).get("gemini_model") or os.getenv("GEMINI_MODEL", "gemini-2.0-flash-exp")
+
+            if not api_key:
+                raise ValueError("Gemini API key not configured")
+
+            # Create client with optional custom base URL
+            if base_url:
+                # Use custom base URL (for proxy/relay services)
+                print(f"[LLM Config] Using custom Gemini base URL: {base_url}", flush=True)
+
+                # Try different methods to set custom base URL
+                try:
+                    # Method 1: Try http_options parameter
+                    self.client = genai.Client(
+                        api_key=api_key,
+                        http_options={'api_endpoint': base_url}
+                    )
+                    print(f"[LLM Config] ✓ Set base URL via http_options", flush=True)
+                except TypeError:
+                    try:
+                        # Method 2: Try vertexai parameter (some versions)
+                        self.client = genai.Client(
+                            api_key=api_key,
+                            vertexai=False,
+                            client_options={'api_endpoint': base_url}
+                        )
+                        print(f"[LLM Config] ✓ Set base URL via client_options", flush=True)
+                    except:
+                        # Method 3: Set environment variable and create client
+                        print(f"[LLM Config] ⚠️ SDK doesn't support custom URL parameter, using environment variable", flush=True)
+                        os.environ['GOOGLE_API_BASE'] = base_url
+                        self.client = genai.Client(api_key=api_key)
+                        print(f"[LLM Config] ✓ Set base URL via environment variable", flush=True)
+            else:
+                # Use default Google API
+                self.client = genai.Client(api_key=api_key)
+
+            # Log configuration for debugging
+            print(f"[LLM Config] Provider: Gemini", flush=True)
+            print(f"[LLM Config] Model: {self.model}", flush=True)
+            if base_url:
+                print(f"[LLM Config] Base URL: {base_url}", flush=True)
+            print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
+
        else:
            raise ValueError(f"Unsupported AI provider: {self.provider}")

@@ -82,33 +141,49 @@ class LLMService:
            ...
        ]
        """
-        prompt = """You are a professional question parser. Parse the given document and extract all questions.
+        prompt = """你是一个专业的试题解析专家。请仔细分析下面的文档内容，提取其中的所有试题。

-For each question, identify:
-1. Question content (the question text)
-2. Question type: single (单选), multiple (多选), judge (判断), short (简答)
-3. Options (for choice questions only, format: ["A. Option1", "B. Option2", ...])
-4. Correct answer
-5. Analysis/Explanation (if available)
+请注意：
+- 文档中可能包含中文或英文题目
+- 题目可能有多种格式，请灵活识别
+- 即使格式不标准，也请尽量提取题目内容
+- 如果文档只是普通文章而没有题目，请返回空数组 []

-Return ONLY a JSON array of questions, with no additional text:
+对于每道题目，请识别：
+1. 题目内容 (完整的题目文字)
+2. 题目类型（**只能**使用以下4种类型之一）：
+   - single：单选题
+   - multiple：多选题
+   - judge：判断题
+   - short：简答题（包括问答题、计算题、证明题、填空题等所有非选择题）
+3. 选项 (仅针对选择题，格式: ["A. 选项1", "B. 选项2", ...])
+4. 正确答案 (请仔细查找文档中的答案。如果确实没有答案，可以填 null)
+5. 解析/说明 (如果有的话)
+
+**重要**：题目类型必须是 single、multiple、judge、short 之一，不要使用其他类型名称！
+
+返回格式：请**只返回** JSON 数组，不要有任何其他文字或 markdown 代码块：
 [
-  {
-    "content": "question text",
+  {{
+    "content": "题目内容",
    "type": "single",
-    "options": ["A. Option1", "B. Option2", "C. Option3", "D. Option4"],
+    "options": ["A. 选项1", "B. 选项2", "C. 选项3", "D. 选项4"],
    "answer": "A",
-    "analysis": "explanation"
-  },
+    "analysis": "解析说明"
+  }},
  ...
 ]

-Document content:
+文档内容：
 ---
 {content}
 ---

-IMPORTANT: Return ONLY the JSON array, no markdown code blocks or explanations."""
+重要提示：
+- 仔细阅读文档内容
+- 识别所有看起来像试题的内容
+- 如果文档中没有题目（比如只是普通文章），返回 []
+- **只返回 JSON 数组**，不要包含 ```json 或其他标记"""

        try:
            if self.provider == "anthropic":
@@ -120,6 +195,20 @@ IMPORTANT: Return ONLY the JSON array, no markdown code blocks or explanations."
                    ]
                )
                result = response.content[0].text
+            elif self.provider == "gemini":
+                # Gemini uses different API
+                import asyncio
+
+                def _generate_content():
+                    return self.client.models.generate_content(
+                        model=self.model,
+                        contents=prompt.format(content=content)
+                    )
+
+                print(f"[Gemini Text] Calling Gemini API with model: {self.model}", flush=True)
+                response = await asyncio.to_thread(_generate_content)
+                result = response.text
+                print(f"[Gemini Text] API call completed", flush=True)
            else:  # OpenAI or Qwen
                response = await self.client.chat.completions.create(
                    model=self.model,
@@ -131,28 +220,398 @@ IMPORTANT: Return ONLY the JSON array, no markdown code blocks or explanations."
                )
                result = response.choices[0].message.content

+            # Log original response for debugging
+            import sys
+            print(f"[LLM Raw Response] Length: {len(result)} chars", flush=True)
+            print(f"[LLM Raw Response] First 300 chars:\n{result[:300]}", flush=True)
+            print(f"[LLM Raw Response] Last 200 chars:\n{result[-200:]}", flush=True)
+            sys.stdout.flush()
+
            # Clean result and parse JSON
            result = result.strip()
+
+            # Remove markdown code blocks
            if result.startswith("```json"):
                result = result[7:]
-            if result.startswith("```"):
+            elif result.startswith("```"):
                result = result[3:]
+
            if result.endswith("```"):
                result = result[:-3]
+
            result = result.strip()

-            questions = json.loads(result)
+            # Try to find JSON array if there's extra text
+            if not result.startswith('['):
+                # Find the first '[' character
+                start_idx = result.find('[')
+                if start_idx != -1:
+                    print(f"[JSON Cleanup] Found '[' at position {start_idx}, extracting array...")
+                    result = result[start_idx:]
+                else:
+                    print(f"[JSON Error] No '[' found in response!")
+                    raise Exception("LLM response does not contain a JSON array")

-            # Add content hash to each question
+            if not result.endswith(']'):
+                # Find the last ']' character
+                end_idx = result.rfind(']')
+                if end_idx != -1:
+                    print(f"[JSON Cleanup] Found last ']' at position {end_idx}")
+                    result = result[:end_idx + 1]
+
+            result = result.strip()
+
+            # Log the cleaned result for debugging
+            print(f"[LLM Cleaned JSON] Length: {len(result)} chars")
+            print(f"[LLM Cleaned JSON] First 300 chars:\n{result[:300]}")
+
+            try:
+                questions = json.loads(result)
+            except json.JSONDecodeError as je:
+                print(f"[JSON Error] Failed to parse JSON at line {je.lineno}, column {je.colno}")
+                print(f"[JSON Error] Error: {je.msg}")
+
+                # If error is about control characters, try to fix them
+                if "control character" in je.msg.lower() or "invalid \\escape" in je.msg.lower():
+                    print(f"[JSON Cleanup] Attempting to fix control characters...", flush=True)
+
+                    # Fix unescaped control characters in JSON string values
+                    import re
+
+                    def fix_string_value(match):
+                        """Fix control characters inside a JSON string value"""
+                        string_content = match.group(1)
+                        # Escape control characters
+                        string_content = string_content.replace('\n', '\\n')
+                        string_content = string_content.replace('\r', '\\r')
+                        string_content = string_content.replace('\t', '\\t')
+                        string_content = string_content.replace('\b', '\\b')
+                        string_content = string_content.replace('\f', '\\f')
+                        return f'"{string_content}"'
+
+                    # Match string values in JSON
+                    # Pattern matches: "..." (handles escaped quotes and backslashes)
+                    # (?:[^"\\]|\\.)* means: either non-quote-non-backslash OR backslash-followed-by-anything, repeated
+                    fixed_result = re.sub(r'"((?:[^"\\]|\\.)*)"', fix_string_value, result)
+
+                    print(f"[JSON Cleanup] Retrying with fixed control characters...", flush=True)
+                    try:
+                        questions = json.loads(fixed_result)
+                        print(f"[JSON Cleanup] ✅ Successfully parsed after fixing control characters!", flush=True)
+                    except json.JSONDecodeError as je2:
+                        print(f"[JSON Error] Still failed after fix: {je2.msg}", flush=True)
+                        # Print context around the error
+                        lines = result.split('\n')
+                        if je.lineno <= len(lines):
+                            start = max(0, je.lineno - 3)
+                            end = min(len(lines), je.lineno + 2)
+                            print(f"[JSON Error] Context (lines {start+1}-{end}):")
+                            for i in range(start, end):
+                                marker = " >>> " if i == je.lineno - 1 else "     "
+                                print(f"{marker}{i+1}: {lines[i]}")
+                        raise Exception(f"Invalid JSON format from LLM: {je.msg} at line {je.lineno}")
+                else:
+                    # Print context around the error
+                    lines = result.split('\n')
+                    if je.lineno <= len(lines):
+                        start = max(0, je.lineno - 3)
+                        end = min(len(lines), je.lineno + 2)
+                        print(f"[JSON Error] Context (lines {start+1}-{end}):")
+                        for i in range(start, end):
+                            marker = " >>> " if i == je.lineno - 1 else "     "
+                            print(f"{marker}{i+1}: {lines[i]}")
+                    raise Exception(f"Invalid JSON format from LLM: {je.msg} at line {je.lineno}")
+
+            # Validate that we got a list
+            if not isinstance(questions, list):
+                raise Exception(f"Expected a list of questions, got {type(questions)}")
+
+            if len(questions) == 0:
+                raise Exception("No questions found in the parsed result")
+
+            # Validate and fix question types
+            valid_types = {"single", "multiple", "judge", "short"}
+            type_mapping = {
+                "proof": "short",
+                "essay": "short",
+                "calculation": "short",
+                "fill": "short",
+                "填空": "short",
+                "证明": "short",
+                "计算": "short",
+                "问答": "short",
+                "单选": "single",
+                "多选": "multiple",
+                "判断": "judge",
+                "简答": "short"
+            }
+
+            # Add content hash and validate types
            for q in questions:
+                if "content" not in q:
+                    print(f"[Warning] Question missing 'content' field: {q}")
+                    continue
+
+                # Validate and fix question type
+                q_type = q.get("type", "short")
+                if isinstance(q_type, str):
+                    q_type_lower = q_type.lower()
+                    if q_type_lower not in valid_types:
+                        # Try to map to valid type
+                        if q_type_lower in type_mapping:
+                            old_type = q_type
+                            q["type"] = type_mapping[q_type_lower]
+                            print(f"[Type Fix] Changed '{old_type}' to '{q['type']}' for question: {q['content'][:50]}...", flush=True)
+                        else:
+                            # Default to short answer
+                            print(f"[Type Fix] Unknown type '{q_type}', defaulting to 'short' for question: {q['content'][:50]}...", flush=True)
+                            q["type"] = "short"
+                    else:
+                        q["type"] = q_type_lower
+                else:
+                    q["type"] = "short"
+
                q["content_hash"] = calculate_content_hash(q["content"])

            return questions

        except Exception as e:
-            print(f"Error parsing document: {e}")
+            print(f"[Error] Document parsing failed: {str(e)}")
            raise Exception(f"Failed to parse document: {str(e)}")

+    async def parse_document_with_pdf(self, pdf_bytes: bytes, filename: str) -> List[Dict[str, Any]]:
+        """
+        Parse PDF document using Gemini's native PDF understanding.
+        Only works with Gemini provider.
+
+        Args:
+            pdf_bytes: PDF file content as bytes
+            filename: Original filename for logging
+
+        Returns:
+            List of question dictionaries
+        """
+        if self.provider != "gemini":
+            raise ValueError("PDF parsing is only supported with Gemini provider")
+
+        prompt = """你是一个专业的试题解析专家。请仔细分析这个 PDF 文档，提取其中的所有试题。
+
+请注意：
+- PDF 中可能包含中文或英文题目
+- 题目可能有多种格式，请灵活识别
+- 即使格式不标准，也请尽量提取题目内容
+- 题目内容如果包含代码或换行，请将换行符替换为空格或\\n
+
+对于每道题目，请识别：
+1. 题目内容 (完整的题目文字，如果有代码请保持在一行或用\\n表示换行)
+2. 题目类型（**只能**使用以下4种类型之一）：
+   - single：单选题
+   - multiple：多选题
+   - judge：判断题
+   - short：简答题（包括问答题、计算题、证明题、填空题等所有非选择题）
+3. 选项 (仅针对选择题，格式: ["A. 选项1", "B. 选项2", ...])
+4. 正确答案 (请仔细查找文档中的答案。如果确实没有答案，可以填 null)
+5. 解析/说明 (如果有的话)
+
+**重要**：题目类型必须是 single、multiple、judge、short 之一，不要使用其他类型名称！
+
+返回格式要求：
+1. **必须**返回一个完整的 JSON 数组（以 [ 开始，以 ] 结束）
+2. **不要**返回 JSONL 格式（每行一个 JSON 对象）
+3. **不要**包含 markdown 代码块标记（```json 或 ```）
+4. **不要**包含任何解释性文字
+
+正确的格式示例：
+[
+  {{
+    "content": "题目内容",
+    "type": "single",
+    "options": ["A. 选项1", "B. 选项2", "C. 选项3", "D. 选项4"],
+    "answer": "A",
+    "analysis": "解析说明"
+  }},
+  {{
+    "content": "第二道题",
+    "type": "judge",
+    "options": [],
+    "answer": "对",
+    "analysis": null
+  }}
+]
+
+重要提示：
+- 请仔细查看 PDF 的每一页
+- 识别所有看起来像试题的内容
+- 如果找不到明确的选项，可以根据上下文推断题目类型
+- 题目内容中的换行请用\\n或空格替换，确保 JSON 格式正确
+- **只返回一个 JSON 数组**，不要包含其他任何内容"""
+
+        try:
+            print(f"[Gemini PDF] Processing PDF: {filename}", flush=True)
+            print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True)
+
+            # Use Gemini's native PDF processing
+            # Run sync API in thread pool to avoid blocking
+            import asyncio
+
+            def _generate_content():
+                return self.client.models.generate_content(
+                    model=self.model,
+                    contents=[
+                        types.Part.from_bytes(
+                            data=pdf_bytes,
+                            mime_type='application/pdf',
+                        ),
+                        prompt
+                    ]
+                )
+
+            print(f"[Gemini PDF] Calling Gemini API with model: {self.model}", flush=True)
+            response = await asyncio.to_thread(_generate_content)
+            print(f"[Gemini PDF] API call completed", flush=True)
+
+            result = response.text
+            print(f"[Gemini PDF] Response retrieved, checking content...", flush=True)
+
+            # Log original response for debugging
+            import sys
+            print(f"[LLM Raw Response] Length: {len(result)} chars", flush=True)
+            print(f"[LLM Raw Response] First 300 chars:\n{result[:300]}", flush=True)
+            print(f"[LLM Raw Response] Last 200 chars:\n{result[-200:]}", flush=True)
+            sys.stdout.flush()
+
+            # Clean result and parse JSON (same as text method)
+            result = result.strip()
+
+            # Remove markdown code blocks
+            if result.startswith("```json"):
+                result = result[7:]
+            elif result.startswith("```"):
+                result = result[3:]
+
+            if result.endswith("```"):
+                result = result[:-3]
+
+            result = result.strip()
+
+            # Try to find JSON array if there's extra text
+            if not result.startswith('['):
+                start_idx = result.find('[')
+                if start_idx != -1:
+                    print(f"[JSON Cleanup] Found '[' at position {start_idx}, extracting array...", flush=True)
+                    result = result[start_idx:]
+                else:
+                    print(f"[JSON Error] No '[' found in response!", flush=True)
+                    raise Exception("LLM response does not contain a JSON array")
+
+            if not result.endswith(']'):
+                end_idx = result.rfind(']')
+                if end_idx != -1:
+                    print(f"[JSON Cleanup] Found last ']' at position {end_idx}", flush=True)
+                    result = result[:end_idx + 1]
+
+            result = result.strip()
+
+            # Log the cleaned result for debugging
+            print(f"[LLM Cleaned JSON] Length: {len(result)} chars", flush=True)
+            print(f"[LLM Cleaned JSON] First 300 chars:\n{result[:300]}", flush=True)
+
+            try:
+                questions = json.loads(result)
+            except json.JSONDecodeError as je:
+                print(f"[JSON Error] Failed to parse JSON at line {je.lineno}, column {je.colno}", flush=True)
+                print(f"[JSON Error] Error: {je.msg}", flush=True)
+                # Print context around the error
+                lines = result.split('\n')
+                if je.lineno <= len(lines):
+                    start = max(0, je.lineno - 3)
+                    end = min(len(lines), je.lineno + 2)
+                    print(f"[JSON Error] Context (lines {start+1}-{end}):", flush=True)
+                    for i in range(start, end):
+                        marker = " >>> " if i == je.lineno - 1 else "     "
+                        print(f"{marker}{i+1}: {lines[i]}", flush=True)
+                raise Exception(f"Invalid JSON format from LLM: {je.msg} at line {je.lineno}")
+
+            # Validate that we got a list
+            if not isinstance(questions, list):
+                raise Exception(f"Expected a list of questions, got {type(questions)}")
+
+            if len(questions) == 0:
+                # Provide more helpful error message
+                print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True)
+                print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True)
+
+                # Ask Gemini what it saw in the PDF
+                def _ask_what_gemini_sees():
+                    return self.client.models.generate_content(
+                        model=self.model,
+                        contents=[
+                            types.Part.from_bytes(
+                                data=pdf_bytes,
+                                mime_type='application/pdf',
+                            ),
+                            "Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."
+                        ]
+                    )
+
+                import asyncio
+                explanation_response = await asyncio.to_thread(_ask_what_gemini_sees)
+                explanation = explanation_response.text
+                print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True)
+
+                raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...")
+
+            # Validate and fix question types
+            valid_types = {"single", "multiple", "judge", "short"}
+            type_mapping = {
+                "proof": "short",
+                "essay": "short",
+                "calculation": "short",
+                "fill": "short",
+                "填空": "short",
+                "证明": "short",
+                "计算": "short",
+                "问答": "short",
+                "单选": "single",
+                "多选": "multiple",
+                "判断": "judge",
+                "简答": "short"
+            }
+
+            # Add content hash and validate types
+            for q in questions:
+                if "content" not in q:
+                    print(f"[Warning] Question missing 'content' field: {q}", flush=True)
+                    continue
+
+                # Validate and fix question type
+                q_type = q.get("type", "short")
+                if isinstance(q_type, str):
+                    q_type_lower = q_type.lower()
+                    if q_type_lower not in valid_types:
+                        # Try to map to valid type
+                        if q_type_lower in type_mapping:
+                            old_type = q_type
+                            q["type"] = type_mapping[q_type_lower]
+                            print(f"[Type Fix] Changed '{old_type}' to '{q['type']}' for question: {q['content'][:50]}...", flush=True)
+                        else:
+                            # Default to short answer
+                            print(f"[Type Fix] Unknown type '{q_type}', defaulting to 'short' for question: {q['content'][:50]}...", flush=True)
+                            q["type"] = "short"
+                    else:
+                        q["type"] = q_type_lower
+                else:
+                    q["type"] = "short"
+
+                q["content_hash"] = calculate_content_hash(q["content"])
+
+            print(f"[Gemini PDF] Successfully extracted {len(questions)} questions", flush=True)
+            return questions
+
+        except Exception as e:
+            print(f"[Error] PDF parsing failed: {str(e)}", flush=True)
+            raise Exception(f"Failed to parse PDF document: {str(e)}")
+
    async def grade_short_answer(
        self,
        question: str,
@@ -201,6 +660,18 @@ Return ONLY the JSON object, no markdown or explanations."""
                    ]
                )
                result = response.content[0].text
+            elif self.provider == "gemini":
+                # Gemini uses different API
+                import asyncio
+
+                def _generate_content():
+                    return self.client.models.generate_content(
+                        model=self.model,
+                        contents=prompt
+                    )
+
+                response = await asyncio.to_thread(_generate_content)
+                result = response.text
            else:  # OpenAI or Qwen
                response = await self.client.chat.completions.create(
                    model=self.model,