refactor: 重构 Gemini 为 REST API 调用以支持自定义代理服务

核心改进： - 🔄 完全重写 Gemini 实现方式 - 移除 google-genai SDK 依赖 - 改用 httpx 直接调用 Gemini REST API - 完全控制请求 URL 和参数 - 🌐 完美支持自定义 Base URL - 支持代理服务（如 https://load.zhuzihan.com/proxy/gemini-self） - 支持 API Key 轮训中转服务 - 兼容标准 Gemini API 格式（v1beta/models/{model}:generateContent） - 📄 保留完整的 PDF 处理能力 - 使用 inline_data 格式（base64 编码） - 原生 PDF 理解（最多 1000 页） - 完整保留图片、表格、公式等视觉元素 - ⚡ 优化性能和稳定性 - 统一使用 httpx.AsyncClient - 120 秒超时配置 - 连接池管理（max_keepalive_connections=5） - 完善的错误处理和日志输出技术细节： - 移除依赖：google-genai==1.0.0 - 请求格式：标准 Gemini REST API - 响应解析：直接从 JSON 提取 candidates[0].content.parts[0].text - PDF 上传：inline_data with base64 encoding 影响范围： - 文本内容解析 ✅ - PDF 文档解析 ✅ - 简答题评分 ✅ - AI 参考答案生成 ✅ 🎉 现在 Gemini 可完美配合用户自建的代理/轮训服务使用！ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-20 20:10:14 +00:00 · 2025-12-01 23:01:44 +08:00
parent d24a1a1f92
commit f403eacb9d
3 changed files with 88 additions and 95 deletions
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -15,7 +15,6 @@ aiofiles==23.2.1
 httpx==0.26.0
 openai==1.10.0
 anthropic==0.8.1
 google-genai==1.0.0
 python-docx==1.1.0
 PyPDF2==3.0.1
 openpyxl==3.1.2
--- a/backend/routers/exam.py
+++ b/backend/routers/exam.py
@@ -113,16 +113,20 @@ async def generate_ai_reference_answer(
    # Generate answer using LLM
    if llm_service.provider == "gemini":
-        import asyncio
+        # Use REST API for Gemini
        url = f"{llm_service.gemini_base_url}/v1beta/models/{llm_service.model}:generateContent"
        headers = {"Content-Type": "application/json"}
        params = {"key": llm_service.gemini_api_key}
        payload = {
            "contents": [{
                "parts": [{"text": prompt}]
            }]
        }
-        def _generate():
+        response = await llm_service.client.post(url, headers=headers, params=params, json=payload)
-            return llm_service.client.models.generate_content(
+        response.raise_for_status()
-                model=llm_service.model,
+        response_data = response.json()
-                contents=prompt
+        return response_data["candidates"][0]["content"]["parts"][0]["text"].strip()
            )
        response = await asyncio.to_thread(_generate)
        return response.text.strip()
    elif llm_service.provider == "anthropic":
        response = await llm_service.client.messages.create(
            model=llm_service.model,
--- a/backend/services/llm_service.py
+++ b/backend/services/llm_service.py
@@ -6,8 +6,6 @@ import json
 from typing import List, Dict, Any, Optional
 from openai import AsyncOpenAI
 from anthropic import AsyncAnthropic
 from google import genai
 from google.genai import types
 import httpx
 from models import QuestionType
@@ -83,43 +81,20 @@ class LLMService:
            if not api_key:
                raise ValueError("Gemini API key not configured")
-            # Create client with optional custom base URL
+            # Store Gemini configuration for REST API calls
-            if base_url:
+            self.gemini_api_key = api_key
-                # Use custom base URL (for proxy/relay services)
+            self.gemini_base_url = base_url or "https://generativelanguage.googleapis.com"
                print(f"[LLM Config] Using custom Gemini base URL: {base_url}", flush=True)
-                # Try different methods to set custom base URL
+            # Create httpx client for REST API calls (instead of SDK)
-                try:
+            self.client = httpx.AsyncClient(
-                    # Method 1: Try http_options parameter
+                timeout=120.0,
-                    self.client = genai.Client(
+                limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
                        api_key=api_key,
                        http_options={'api_endpoint': base_url}
            )
                    print(f"[LLM Config] ✓ Set base URL via http_options", flush=True)
                except TypeError:
                    try:
                        # Method 2: Try vertexai parameter (some versions)
                        self.client = genai.Client(
                            api_key=api_key,
                            vertexai=False,
                            client_options={'api_endpoint': base_url}
                        )
                        print(f"[LLM Config] ✓ Set base URL via client_options", flush=True)
                    except:
                        # Method 3: Set environment variable and create client
                        print(f"[LLM Config] ⚠️ SDK doesn't support custom URL parameter, using environment variable", flush=True)
                        os.environ['GOOGLE_API_BASE'] = base_url
                        self.client = genai.Client(api_key=api_key)
                        print(f"[LLM Config] ✓ Set base URL via environment variable", flush=True)
            else:
                # Use default Google API
                self.client = genai.Client(api_key=api_key)
            # Log configuration for debugging
-            print(f"[LLM Config] Provider: Gemini", flush=True)
+            print(f"[LLM Config] Provider: Gemini (REST API)", flush=True)
            print(f"[LLM Config] Model: {self.model}", flush=True)
-            if base_url:
+            print(f"[LLM Config] Base URL: {self.gemini_base_url}", flush=True)
                print(f"[LLM Config] Base URL: {base_url}", flush=True)
            print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
        else:
@@ -196,18 +171,24 @@ class LLMService:
                )
                result = response.content[0].text
            elif self.provider == "gemini":
-                # Gemini uses different API
+                # Gemini uses REST API
-                import asyncio
+                print(f"[Gemini Text] Calling Gemini REST API with model: {self.model}", flush=True)
-                def _generate_content():
+                url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
-                    return self.client.models.generate_content(
+                headers = {"Content-Type": "application/json"}
-                        model=self.model,
+                params = {"key": self.gemini_api_key}
-                        contents=prompt.format(content=content)
+                payload = {
-                    )
+                    "contents": [{
                        "parts": [{"text": prompt.format(content=content)}]
                    }]
                }
-                print(f"[Gemini Text] Calling Gemini API with model: {self.model}", flush=True)
+                response = await self.client.post(url, headers=headers, params=params, json=payload)
-                response = await asyncio.to_thread(_generate_content)
+                response.raise_for_status()
-                result = response.text
+                response_data = response.json()
                # Extract text from response
                result = response_data["candidates"][0]["content"]["parts"][0]["text"]
                print(f"[Gemini Text] API call completed", flush=True)
            else:  # OpenAI or Qwen
                response = await self.client.chat.completions.create(
@@ -450,27 +431,35 @@ class LLMService:
            print(f"[Gemini PDF] Processing PDF: {filename}", flush=True)
            print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True)
-            # Use Gemini's native PDF processing
+            # Use Gemini's native PDF processing via REST API
-            # Run sync API in thread pool to avoid blocking
+            import base64
            import asyncio
-            def _generate_content():
+            # Encode PDF to base64
-                return self.client.models.generate_content(
+            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
-                    model=self.model,
+            print(f"[Gemini PDF] PDF encoded to base64: {len(pdf_base64)} chars", flush=True)
-                    contents=[
+
-                        types.Part.from_bytes(
+            # Build REST API request
-                            data=pdf_bytes,
+            url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
-                            mime_type='application/pdf',
+            headers = {"Content-Type": "application/json"}
-                        ),
+            params = {"key": self.gemini_api_key}
-                        prompt
+            payload = {
                "contents": [{
                    "parts": [
                        {"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
                        {"text": prompt}
                    ]
-                )
+                }]
            }
-            print(f"[Gemini PDF] Calling Gemini API with model: {self.model}", flush=True)
+            print(f"[Gemini PDF] Calling Gemini REST API with model: {self.model}", flush=True)
-            response = await asyncio.to_thread(_generate_content)
+            response = await self.client.post(url, headers=headers, params=params, json=payload)
            response.raise_for_status()
            print(f"[Gemini PDF] API call completed", flush=True)
-            result = response.text
+            response_data = response.json()
            # Extract text from response
            result = response_data["candidates"][0]["content"]["parts"][0]["text"]
            print(f"[Gemini PDF] Response retrieved, checking content...", flush=True)
            # Log original response for debugging
@@ -541,22 +530,20 @@ class LLMService:
                print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True)
                print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True)
-                # Ask Gemini what it saw in the PDF
+                # Ask Gemini what it saw in the PDF using REST API
-                def _ask_what_gemini_sees():
+                explanation_payload = {
-                    return self.client.models.generate_content(
+                    "contents": [{
-                        model=self.model,
+                        "parts": [
-                        contents=[
+                            {"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
-                            types.Part.from_bytes(
+                            {"text": "Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."}
                                data=pdf_bytes,
                                mime_type='application/pdf',
                            ),
                            "Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."
                        ]
-                    )
+                    }]
                }
-                import asyncio
+                explanation_response = await self.client.post(url, headers=headers, params=params, json=explanation_payload)
-                explanation_response = await asyncio.to_thread(_ask_what_gemini_sees)
+                explanation_response.raise_for_status()
-                explanation = explanation_response.text
+                explanation_data = explanation_response.json()
                explanation = explanation_data["candidates"][0]["content"]["parts"][0]["text"]
                print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True)
                raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...")
@@ -661,17 +648,20 @@ Return ONLY the JSON object, no markdown or explanations."""
                )
                result = response.content[0].text
            elif self.provider == "gemini":
-                # Gemini uses different API
+                # Gemini uses REST API
-                import asyncio
+                url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
                headers = {"Content-Type": "application/json"}
                params = {"key": self.gemini_api_key}
                payload = {
                    "contents": [{
                        "parts": [{"text": prompt}]
                    }]
                }
-                def _generate_content():
+                response = await self.client.post(url, headers=headers, params=params, json=payload)
-                    return self.client.models.generate_content(
+                response.raise_for_status()
-                        model=self.model,
+                response_data = response.json()
-                        contents=prompt
+                result = response_data["candidates"][0]["content"]["parts"][0]["text"]
                    )
                response = await asyncio.to_thread(_generate_content)
                result = response.text
            else:  # OpenAI or Qwen
                response = await self.client.chat.completions.create(
                    model=self.model,