refactor: 重构 Gemini 为 REST API 调用以支持自定义代理服务

核心改进:
- 🔄 完全重写 Gemini 实现方式
  - 移除 google-genai SDK 依赖
  - 改用 httpx 直接调用 Gemini REST API
  - 完全控制请求 URL 和参数

- 🌐 完美支持自定义 Base URL
  - 支持代理服务(如 https://load.zhuzihan.com/proxy/gemini-self)
  - 支持 API Key 轮训中转服务
  - 兼容标准 Gemini API 格式(v1beta/models/{model}:generateContent)

- 📄 保留完整的 PDF 处理能力
  - 使用 inline_data 格式(base64 编码)
  - 原生 PDF 理解(最多 1000 页)
  - 完整保留图片、表格、公式等视觉元素

-  优化性能和稳定性
  - 统一使用 httpx.AsyncClient
  - 120 秒超时配置
  - 连接池管理(max_keepalive_connections=5)
  - 完善的错误处理和日志输出

技术细节:
- 移除依赖:google-genai==1.0.0
- 请求格式:标准 Gemini REST API
- 响应解析:直接从 JSON 提取 candidates[0].content.parts[0].text
- PDF 上传:inline_data with base64 encoding

影响范围:
- 文本内容解析 
- PDF 文档解析 
- 简答题评分 
- AI 参考答案生成 

🎉 现在 Gemini 可完美配合用户自建的代理/轮训服务使用!

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-01 23:01:44 +08:00
parent d24a1a1f92
commit f403eacb9d
3 changed files with 88 additions and 95 deletions

View File

@@ -15,7 +15,6 @@ aiofiles==23.2.1
httpx==0.26.0 httpx==0.26.0
openai==1.10.0 openai==1.10.0
anthropic==0.8.1 anthropic==0.8.1
google-genai==1.0.0
python-docx==1.1.0 python-docx==1.1.0
PyPDF2==3.0.1 PyPDF2==3.0.1
openpyxl==3.1.2 openpyxl==3.1.2

View File

@@ -113,16 +113,20 @@ async def generate_ai_reference_answer(
# Generate answer using LLM # Generate answer using LLM
if llm_service.provider == "gemini": if llm_service.provider == "gemini":
import asyncio # Use REST API for Gemini
url = f"{llm_service.gemini_base_url}/v1beta/models/{llm_service.model}:generateContent"
headers = {"Content-Type": "application/json"}
params = {"key": llm_service.gemini_api_key}
payload = {
"contents": [{
"parts": [{"text": prompt}]
}]
}
def _generate(): response = await llm_service.client.post(url, headers=headers, params=params, json=payload)
return llm_service.client.models.generate_content( response.raise_for_status()
model=llm_service.model, response_data = response.json()
contents=prompt return response_data["candidates"][0]["content"]["parts"][0]["text"].strip()
)
response = await asyncio.to_thread(_generate)
return response.text.strip()
elif llm_service.provider == "anthropic": elif llm_service.provider == "anthropic":
response = await llm_service.client.messages.create( response = await llm_service.client.messages.create(
model=llm_service.model, model=llm_service.model,

View File

@@ -6,8 +6,6 @@ import json
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from openai import AsyncOpenAI from openai import AsyncOpenAI
from anthropic import AsyncAnthropic from anthropic import AsyncAnthropic
from google import genai
from google.genai import types
import httpx import httpx
from models import QuestionType from models import QuestionType
@@ -83,43 +81,20 @@ class LLMService:
if not api_key: if not api_key:
raise ValueError("Gemini API key not configured") raise ValueError("Gemini API key not configured")
# Create client with optional custom base URL # Store Gemini configuration for REST API calls
if base_url: self.gemini_api_key = api_key
# Use custom base URL (for proxy/relay services) self.gemini_base_url = base_url or "https://generativelanguage.googleapis.com"
print(f"[LLM Config] Using custom Gemini base URL: {base_url}", flush=True)
# Try different methods to set custom base URL # Create httpx client for REST API calls (instead of SDK)
try: self.client = httpx.AsyncClient(
# Method 1: Try http_options parameter timeout=120.0,
self.client = genai.Client( limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
api_key=api_key, )
http_options={'api_endpoint': base_url}
)
print(f"[LLM Config] ✓ Set base URL via http_options", flush=True)
except TypeError:
try:
# Method 2: Try vertexai parameter (some versions)
self.client = genai.Client(
api_key=api_key,
vertexai=False,
client_options={'api_endpoint': base_url}
)
print(f"[LLM Config] ✓ Set base URL via client_options", flush=True)
except:
# Method 3: Set environment variable and create client
print(f"[LLM Config] ⚠️ SDK doesn't support custom URL parameter, using environment variable", flush=True)
os.environ['GOOGLE_API_BASE'] = base_url
self.client = genai.Client(api_key=api_key)
print(f"[LLM Config] ✓ Set base URL via environment variable", flush=True)
else:
# Use default Google API
self.client = genai.Client(api_key=api_key)
# Log configuration for debugging # Log configuration for debugging
print(f"[LLM Config] Provider: Gemini", flush=True) print(f"[LLM Config] Provider: Gemini (REST API)", flush=True)
print(f"[LLM Config] Model: {self.model}", flush=True) print(f"[LLM Config] Model: {self.model}", flush=True)
if base_url: print(f"[LLM Config] Base URL: {self.gemini_base_url}", flush=True)
print(f"[LLM Config] Base URL: {base_url}", flush=True)
print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True) print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
else: else:
@@ -196,18 +171,24 @@ class LLMService:
) )
result = response.content[0].text result = response.content[0].text
elif self.provider == "gemini": elif self.provider == "gemini":
# Gemini uses different API # Gemini uses REST API
import asyncio print(f"[Gemini Text] Calling Gemini REST API with model: {self.model}", flush=True)
def _generate_content(): url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
return self.client.models.generate_content( headers = {"Content-Type": "application/json"}
model=self.model, params = {"key": self.gemini_api_key}
contents=prompt.format(content=content) payload = {
) "contents": [{
"parts": [{"text": prompt.format(content=content)}]
}]
}
print(f"[Gemini Text] Calling Gemini API with model: {self.model}", flush=True) response = await self.client.post(url, headers=headers, params=params, json=payload)
response = await asyncio.to_thread(_generate_content) response.raise_for_status()
result = response.text response_data = response.json()
# Extract text from response
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
print(f"[Gemini Text] API call completed", flush=True) print(f"[Gemini Text] API call completed", flush=True)
else: # OpenAI or Qwen else: # OpenAI or Qwen
response = await self.client.chat.completions.create( response = await self.client.chat.completions.create(
@@ -450,27 +431,35 @@ class LLMService:
print(f"[Gemini PDF] Processing PDF: {filename}", flush=True) print(f"[Gemini PDF] Processing PDF: {filename}", flush=True)
print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True) print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True)
# Use Gemini's native PDF processing # Use Gemini's native PDF processing via REST API
# Run sync API in thread pool to avoid blocking import base64
import asyncio
def _generate_content(): # Encode PDF to base64
return self.client.models.generate_content( pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
model=self.model, print(f"[Gemini PDF] PDF encoded to base64: {len(pdf_base64)} chars", flush=True)
contents=[
types.Part.from_bytes( # Build REST API request
data=pdf_bytes, url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
mime_type='application/pdf', headers = {"Content-Type": "application/json"}
), params = {"key": self.gemini_api_key}
prompt payload = {
"contents": [{
"parts": [
{"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
{"text": prompt}
] ]
) }]
}
print(f"[Gemini PDF] Calling Gemini API with model: {self.model}", flush=True) print(f"[Gemini PDF] Calling Gemini REST API with model: {self.model}", flush=True)
response = await asyncio.to_thread(_generate_content) response = await self.client.post(url, headers=headers, params=params, json=payload)
response.raise_for_status()
print(f"[Gemini PDF] API call completed", flush=True) print(f"[Gemini PDF] API call completed", flush=True)
result = response.text response_data = response.json()
# Extract text from response
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
print(f"[Gemini PDF] Response retrieved, checking content...", flush=True) print(f"[Gemini PDF] Response retrieved, checking content...", flush=True)
# Log original response for debugging # Log original response for debugging
@@ -541,22 +530,20 @@ class LLMService:
print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True) print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True)
print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True) print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True)
# Ask Gemini what it saw in the PDF # Ask Gemini what it saw in the PDF using REST API
def _ask_what_gemini_sees(): explanation_payload = {
return self.client.models.generate_content( "contents": [{
model=self.model, "parts": [
contents=[ {"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
types.Part.from_bytes( {"text": "Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."}
data=pdf_bytes,
mime_type='application/pdf',
),
"Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."
] ]
) }]
}
import asyncio explanation_response = await self.client.post(url, headers=headers, params=params, json=explanation_payload)
explanation_response = await asyncio.to_thread(_ask_what_gemini_sees) explanation_response.raise_for_status()
explanation = explanation_response.text explanation_data = explanation_response.json()
explanation = explanation_data["candidates"][0]["content"]["parts"][0]["text"]
print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True) print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True)
raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...") raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...")
@@ -661,17 +648,20 @@ Return ONLY the JSON object, no markdown or explanations."""
) )
result = response.content[0].text result = response.content[0].text
elif self.provider == "gemini": elif self.provider == "gemini":
# Gemini uses different API # Gemini uses REST API
import asyncio url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
headers = {"Content-Type": "application/json"}
params = {"key": self.gemini_api_key}
payload = {
"contents": [{
"parts": [{"text": prompt}]
}]
}
def _generate_content(): response = await self.client.post(url, headers=headers, params=params, json=payload)
return self.client.models.generate_content( response.raise_for_status()
model=self.model, response_data = response.json()
contents=prompt result = response_data["candidates"][0]["content"]["parts"][0]["text"]
)
response = await asyncio.to_thread(_generate_content)
result = response.text
else: # OpenAI or Qwen else: # OpenAI or Qwen
response = await self.client.chat.completions.create( response = await self.client.chat.completions.create(
model=self.model, model=self.model,