refactor: 重构 Gemini 为 REST API 调用以支持自定义代理服务

核心改进:
- 🔄 完全重写 Gemini 实现方式
  - 移除 google-genai SDK 依赖
  - 改用 httpx 直接调用 Gemini REST API
  - 完全控制请求 URL 和参数

- 🌐 完美支持自定义 Base URL
  - 支持代理服务(如 https://load.zhuzihan.com/proxy/gemini-self)
  - 支持 API Key 轮训中转服务
  - 兼容标准 Gemini API 格式(v1beta/models/{model}:generateContent)

- 📄 保留完整的 PDF 处理能力
  - 使用 inline_data 格式(base64 编码)
  - 原生 PDF 理解(最多 1000 页)
  - 完整保留图片、表格、公式等视觉元素

-  优化性能和稳定性
  - 统一使用 httpx.AsyncClient
  - 120 秒超时配置
  - 连接池管理(max_keepalive_connections=5)
  - 完善的错误处理和日志输出

技术细节:
- 移除依赖:google-genai==1.0.0
- 请求格式:标准 Gemini REST API
- 响应解析:直接从 JSON 提取 candidates[0].content.parts[0].text
- PDF 上传:inline_data with base64 encoding

影响范围:
- 文本内容解析 
- PDF 文档解析 
- 简答题评分 
- AI 参考答案生成 

🎉 现在 Gemini 可完美配合用户自建的代理/轮训服务使用!

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-01 23:01:44 +08:00
parent d24a1a1f92
commit f403eacb9d
3 changed files with 88 additions and 95 deletions

View File

@@ -15,7 +15,6 @@ aiofiles==23.2.1
httpx==0.26.0
openai==1.10.0
anthropic==0.8.1
google-genai==1.0.0
python-docx==1.1.0
PyPDF2==3.0.1
openpyxl==3.1.2

View File

@@ -113,16 +113,20 @@ async def generate_ai_reference_answer(
# Generate answer using LLM
if llm_service.provider == "gemini":
import asyncio
# Use REST API for Gemini
url = f"{llm_service.gemini_base_url}/v1beta/models/{llm_service.model}:generateContent"
headers = {"Content-Type": "application/json"}
params = {"key": llm_service.gemini_api_key}
payload = {
"contents": [{
"parts": [{"text": prompt}]
}]
}
def _generate():
return llm_service.client.models.generate_content(
model=llm_service.model,
contents=prompt
)
response = await asyncio.to_thread(_generate)
return response.text.strip()
response = await llm_service.client.post(url, headers=headers, params=params, json=payload)
response.raise_for_status()
response_data = response.json()
return response_data["candidates"][0]["content"]["parts"][0]["text"].strip()
elif llm_service.provider == "anthropic":
response = await llm_service.client.messages.create(
model=llm_service.model,

View File

@@ -6,8 +6,6 @@ import json
from typing import List, Dict, Any, Optional
from openai import AsyncOpenAI
from anthropic import AsyncAnthropic
from google import genai
from google.genai import types
import httpx
from models import QuestionType
@@ -83,43 +81,20 @@ class LLMService:
if not api_key:
raise ValueError("Gemini API key not configured")
# Create client with optional custom base URL
if base_url:
# Use custom base URL (for proxy/relay services)
print(f"[LLM Config] Using custom Gemini base URL: {base_url}", flush=True)
# Store Gemini configuration for REST API calls
self.gemini_api_key = api_key
self.gemini_base_url = base_url or "https://generativelanguage.googleapis.com"
# Try different methods to set custom base URL
try:
# Method 1: Try http_options parameter
self.client = genai.Client(
api_key=api_key,
http_options={'api_endpoint': base_url}
)
print(f"[LLM Config] ✓ Set base URL via http_options", flush=True)
except TypeError:
try:
# Method 2: Try vertexai parameter (some versions)
self.client = genai.Client(
api_key=api_key,
vertexai=False,
client_options={'api_endpoint': base_url}
)
print(f"[LLM Config] ✓ Set base URL via client_options", flush=True)
except:
# Method 3: Set environment variable and create client
print(f"[LLM Config] ⚠️ SDK doesn't support custom URL parameter, using environment variable", flush=True)
os.environ['GOOGLE_API_BASE'] = base_url
self.client = genai.Client(api_key=api_key)
print(f"[LLM Config] ✓ Set base URL via environment variable", flush=True)
else:
# Use default Google API
self.client = genai.Client(api_key=api_key)
# Create httpx client for REST API calls (instead of SDK)
self.client = httpx.AsyncClient(
timeout=120.0,
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
)
# Log configuration for debugging
print(f"[LLM Config] Provider: Gemini", flush=True)
print(f"[LLM Config] Provider: Gemini (REST API)", flush=True)
print(f"[LLM Config] Model: {self.model}", flush=True)
if base_url:
print(f"[LLM Config] Base URL: {base_url}", flush=True)
print(f"[LLM Config] Base URL: {self.gemini_base_url}", flush=True)
print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
else:
@@ -196,18 +171,24 @@ class LLMService:
)
result = response.content[0].text
elif self.provider == "gemini":
# Gemini uses different API
import asyncio
# Gemini uses REST API
print(f"[Gemini Text] Calling Gemini REST API with model: {self.model}", flush=True)
def _generate_content():
return self.client.models.generate_content(
model=self.model,
contents=prompt.format(content=content)
)
url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
headers = {"Content-Type": "application/json"}
params = {"key": self.gemini_api_key}
payload = {
"contents": [{
"parts": [{"text": prompt.format(content=content)}]
}]
}
print(f"[Gemini Text] Calling Gemini API with model: {self.model}", flush=True)
response = await asyncio.to_thread(_generate_content)
result = response.text
response = await self.client.post(url, headers=headers, params=params, json=payload)
response.raise_for_status()
response_data = response.json()
# Extract text from response
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
print(f"[Gemini Text] API call completed", flush=True)
else: # OpenAI or Qwen
response = await self.client.chat.completions.create(
@@ -450,27 +431,35 @@ class LLMService:
print(f"[Gemini PDF] Processing PDF: {filename}", flush=True)
print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True)
# Use Gemini's native PDF processing
# Run sync API in thread pool to avoid blocking
import asyncio
# Use Gemini's native PDF processing via REST API
import base64
def _generate_content():
return self.client.models.generate_content(
model=self.model,
contents=[
types.Part.from_bytes(
data=pdf_bytes,
mime_type='application/pdf',
),
prompt
# Encode PDF to base64
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
print(f"[Gemini PDF] PDF encoded to base64: {len(pdf_base64)} chars", flush=True)
# Build REST API request
url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
headers = {"Content-Type": "application/json"}
params = {"key": self.gemini_api_key}
payload = {
"contents": [{
"parts": [
{"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
{"text": prompt}
]
)
}]
}
print(f"[Gemini PDF] Calling Gemini API with model: {self.model}", flush=True)
response = await asyncio.to_thread(_generate_content)
print(f"[Gemini PDF] Calling Gemini REST API with model: {self.model}", flush=True)
response = await self.client.post(url, headers=headers, params=params, json=payload)
response.raise_for_status()
print(f"[Gemini PDF] API call completed", flush=True)
result = response.text
response_data = response.json()
# Extract text from response
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
print(f"[Gemini PDF] Response retrieved, checking content...", flush=True)
# Log original response for debugging
@@ -541,22 +530,20 @@ class LLMService:
print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True)
print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True)
# Ask Gemini what it saw in the PDF
def _ask_what_gemini_sees():
return self.client.models.generate_content(
model=self.model,
contents=[
types.Part.from_bytes(
data=pdf_bytes,
mime_type='application/pdf',
),
"Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."
# Ask Gemini what it saw in the PDF using REST API
explanation_payload = {
"contents": [{
"parts": [
{"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
{"text": "Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."}
]
)
}]
}
import asyncio
explanation_response = await asyncio.to_thread(_ask_what_gemini_sees)
explanation = explanation_response.text
explanation_response = await self.client.post(url, headers=headers, params=params, json=explanation_payload)
explanation_response.raise_for_status()
explanation_data = explanation_response.json()
explanation = explanation_data["candidates"][0]["content"]["parts"][0]["text"]
print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True)
raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...")
@@ -661,17 +648,20 @@ Return ONLY the JSON object, no markdown or explanations."""
)
result = response.content[0].text
elif self.provider == "gemini":
# Gemini uses different API
import asyncio
# Gemini uses REST API
url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
headers = {"Content-Type": "application/json"}
params = {"key": self.gemini_api_key}
payload = {
"contents": [{
"parts": [{"text": prompt}]
}]
}
def _generate_content():
return self.client.models.generate_content(
model=self.model,
contents=prompt
)
response = await asyncio.to_thread(_generate_content)
result = response.text
response = await self.client.post(url, headers=headers, params=params, json=payload)
response.raise_for_status()
response_data = response.json()
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
else: # OpenAI or Qwen
response = await self.client.chat.completions.create(
model=self.model,