mirror of
https://github.com/handsomezhuzhu/QQuiz.git
synced 2026-02-20 20:10:14 +00:00
refactor: 重构 Gemini 为 REST API 调用以支持自定义代理服务
核心改进: - 🔄 完全重写 Gemini 实现方式 - 移除 google-genai SDK 依赖 - 改用 httpx 直接调用 Gemini REST API - 完全控制请求 URL 和参数 - 🌐 完美支持自定义 Base URL - 支持代理服务(如 https://load.zhuzihan.com/proxy/gemini-self) - 支持 API Key 轮训中转服务 - 兼容标准 Gemini API 格式(v1beta/models/{model}:generateContent) - 📄 保留完整的 PDF 处理能力 - 使用 inline_data 格式(base64 编码) - 原生 PDF 理解(最多 1000 页) - 完整保留图片、表格、公式等视觉元素 - ⚡ 优化性能和稳定性 - 统一使用 httpx.AsyncClient - 120 秒超时配置 - 连接池管理(max_keepalive_connections=5) - 完善的错误处理和日志输出 技术细节: - 移除依赖:google-genai==1.0.0 - 请求格式:标准 Gemini REST API - 响应解析:直接从 JSON 提取 candidates[0].content.parts[0].text - PDF 上传:inline_data with base64 encoding 影响范围: - 文本内容解析 ✅ - PDF 文档解析 ✅ - 简答题评分 ✅ - AI 参考答案生成 ✅ 🎉 现在 Gemini 可完美配合用户自建的代理/轮训服务使用! 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -15,7 +15,6 @@ aiofiles==23.2.1
|
|||||||
httpx==0.26.0
|
httpx==0.26.0
|
||||||
openai==1.10.0
|
openai==1.10.0
|
||||||
anthropic==0.8.1
|
anthropic==0.8.1
|
||||||
google-genai==1.0.0
|
|
||||||
python-docx==1.1.0
|
python-docx==1.1.0
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
openpyxl==3.1.2
|
openpyxl==3.1.2
|
||||||
|
|||||||
@@ -113,16 +113,20 @@ async def generate_ai_reference_answer(
|
|||||||
|
|
||||||
# Generate answer using LLM
|
# Generate answer using LLM
|
||||||
if llm_service.provider == "gemini":
|
if llm_service.provider == "gemini":
|
||||||
import asyncio
|
# Use REST API for Gemini
|
||||||
|
url = f"{llm_service.gemini_base_url}/v1beta/models/{llm_service.model}:generateContent"
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
params = {"key": llm_service.gemini_api_key}
|
||||||
|
payload = {
|
||||||
|
"contents": [{
|
||||||
|
"parts": [{"text": prompt}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
def _generate():
|
response = await llm_service.client.post(url, headers=headers, params=params, json=payload)
|
||||||
return llm_service.client.models.generate_content(
|
response.raise_for_status()
|
||||||
model=llm_service.model,
|
response_data = response.json()
|
||||||
contents=prompt
|
return response_data["candidates"][0]["content"]["parts"][0]["text"].strip()
|
||||||
)
|
|
||||||
|
|
||||||
response = await asyncio.to_thread(_generate)
|
|
||||||
return response.text.strip()
|
|
||||||
elif llm_service.provider == "anthropic":
|
elif llm_service.provider == "anthropic":
|
||||||
response = await llm_service.client.messages.create(
|
response = await llm_service.client.messages.create(
|
||||||
model=llm_service.model,
|
model=llm_service.model,
|
||||||
|
|||||||
@@ -6,8 +6,6 @@ import json
|
|||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
from anthropic import AsyncAnthropic
|
from anthropic import AsyncAnthropic
|
||||||
from google import genai
|
|
||||||
from google.genai import types
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from models import QuestionType
|
from models import QuestionType
|
||||||
@@ -83,43 +81,20 @@ class LLMService:
|
|||||||
if not api_key:
|
if not api_key:
|
||||||
raise ValueError("Gemini API key not configured")
|
raise ValueError("Gemini API key not configured")
|
||||||
|
|
||||||
# Create client with optional custom base URL
|
# Store Gemini configuration for REST API calls
|
||||||
if base_url:
|
self.gemini_api_key = api_key
|
||||||
# Use custom base URL (for proxy/relay services)
|
self.gemini_base_url = base_url or "https://generativelanguage.googleapis.com"
|
||||||
print(f"[LLM Config] Using custom Gemini base URL: {base_url}", flush=True)
|
|
||||||
|
|
||||||
# Try different methods to set custom base URL
|
# Create httpx client for REST API calls (instead of SDK)
|
||||||
try:
|
self.client = httpx.AsyncClient(
|
||||||
# Method 1: Try http_options parameter
|
timeout=120.0,
|
||||||
self.client = genai.Client(
|
limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
|
||||||
api_key=api_key,
|
|
||||||
http_options={'api_endpoint': base_url}
|
|
||||||
)
|
)
|
||||||
print(f"[LLM Config] ✓ Set base URL via http_options", flush=True)
|
|
||||||
except TypeError:
|
|
||||||
try:
|
|
||||||
# Method 2: Try vertexai parameter (some versions)
|
|
||||||
self.client = genai.Client(
|
|
||||||
api_key=api_key,
|
|
||||||
vertexai=False,
|
|
||||||
client_options={'api_endpoint': base_url}
|
|
||||||
)
|
|
||||||
print(f"[LLM Config] ✓ Set base URL via client_options", flush=True)
|
|
||||||
except:
|
|
||||||
# Method 3: Set environment variable and create client
|
|
||||||
print(f"[LLM Config] ⚠️ SDK doesn't support custom URL parameter, using environment variable", flush=True)
|
|
||||||
os.environ['GOOGLE_API_BASE'] = base_url
|
|
||||||
self.client = genai.Client(api_key=api_key)
|
|
||||||
print(f"[LLM Config] ✓ Set base URL via environment variable", flush=True)
|
|
||||||
else:
|
|
||||||
# Use default Google API
|
|
||||||
self.client = genai.Client(api_key=api_key)
|
|
||||||
|
|
||||||
# Log configuration for debugging
|
# Log configuration for debugging
|
||||||
print(f"[LLM Config] Provider: Gemini", flush=True)
|
print(f"[LLM Config] Provider: Gemini (REST API)", flush=True)
|
||||||
print(f"[LLM Config] Model: {self.model}", flush=True)
|
print(f"[LLM Config] Model: {self.model}", flush=True)
|
||||||
if base_url:
|
print(f"[LLM Config] Base URL: {self.gemini_base_url}", flush=True)
|
||||||
print(f"[LLM Config] Base URL: {base_url}", flush=True)
|
|
||||||
print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
|
print(f"[LLM Config] API Key: {api_key[:10]}...{api_key[-4:] if len(api_key) > 14 else 'xxxx'}", flush=True)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@@ -196,18 +171,24 @@ class LLMService:
|
|||||||
)
|
)
|
||||||
result = response.content[0].text
|
result = response.content[0].text
|
||||||
elif self.provider == "gemini":
|
elif self.provider == "gemini":
|
||||||
# Gemini uses different API
|
# Gemini uses REST API
|
||||||
import asyncio
|
print(f"[Gemini Text] Calling Gemini REST API with model: {self.model}", flush=True)
|
||||||
|
|
||||||
def _generate_content():
|
url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
|
||||||
return self.client.models.generate_content(
|
headers = {"Content-Type": "application/json"}
|
||||||
model=self.model,
|
params = {"key": self.gemini_api_key}
|
||||||
contents=prompt.format(content=content)
|
payload = {
|
||||||
)
|
"contents": [{
|
||||||
|
"parts": [{"text": prompt.format(content=content)}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
print(f"[Gemini Text] Calling Gemini API with model: {self.model}", flush=True)
|
response = await self.client.post(url, headers=headers, params=params, json=payload)
|
||||||
response = await asyncio.to_thread(_generate_content)
|
response.raise_for_status()
|
||||||
result = response.text
|
response_data = response.json()
|
||||||
|
|
||||||
|
# Extract text from response
|
||||||
|
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
||||||
print(f"[Gemini Text] API call completed", flush=True)
|
print(f"[Gemini Text] API call completed", flush=True)
|
||||||
else: # OpenAI or Qwen
|
else: # OpenAI or Qwen
|
||||||
response = await self.client.chat.completions.create(
|
response = await self.client.chat.completions.create(
|
||||||
@@ -450,27 +431,35 @@ class LLMService:
|
|||||||
print(f"[Gemini PDF] Processing PDF: {filename}", flush=True)
|
print(f"[Gemini PDF] Processing PDF: {filename}", flush=True)
|
||||||
print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True)
|
print(f"[Gemini PDF] File size: {len(pdf_bytes)} bytes", flush=True)
|
||||||
|
|
||||||
# Use Gemini's native PDF processing
|
# Use Gemini's native PDF processing via REST API
|
||||||
# Run sync API in thread pool to avoid blocking
|
import base64
|
||||||
import asyncio
|
|
||||||
|
|
||||||
def _generate_content():
|
# Encode PDF to base64
|
||||||
return self.client.models.generate_content(
|
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||||||
model=self.model,
|
print(f"[Gemini PDF] PDF encoded to base64: {len(pdf_base64)} chars", flush=True)
|
||||||
contents=[
|
|
||||||
types.Part.from_bytes(
|
# Build REST API request
|
||||||
data=pdf_bytes,
|
url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
|
||||||
mime_type='application/pdf',
|
headers = {"Content-Type": "application/json"}
|
||||||
),
|
params = {"key": self.gemini_api_key}
|
||||||
prompt
|
payload = {
|
||||||
|
"contents": [{
|
||||||
|
"parts": [
|
||||||
|
{"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
|
||||||
|
{"text": prompt}
|
||||||
]
|
]
|
||||||
)
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
print(f"[Gemini PDF] Calling Gemini API with model: {self.model}", flush=True)
|
print(f"[Gemini PDF] Calling Gemini REST API with model: {self.model}", flush=True)
|
||||||
response = await asyncio.to_thread(_generate_content)
|
response = await self.client.post(url, headers=headers, params=params, json=payload)
|
||||||
|
response.raise_for_status()
|
||||||
print(f"[Gemini PDF] API call completed", flush=True)
|
print(f"[Gemini PDF] API call completed", flush=True)
|
||||||
|
|
||||||
result = response.text
|
response_data = response.json()
|
||||||
|
|
||||||
|
# Extract text from response
|
||||||
|
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
||||||
print(f"[Gemini PDF] Response retrieved, checking content...", flush=True)
|
print(f"[Gemini PDF] Response retrieved, checking content...", flush=True)
|
||||||
|
|
||||||
# Log original response for debugging
|
# Log original response for debugging
|
||||||
@@ -541,22 +530,20 @@ class LLMService:
|
|||||||
print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True)
|
print(f"[Gemini PDF] ⚠️ Gemini returned empty array - PDF may not contain recognizable questions", flush=True)
|
||||||
print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True)
|
print(f"[Gemini PDF] 💡 Trying to get Gemini's explanation...", flush=True)
|
||||||
|
|
||||||
# Ask Gemini what it saw in the PDF
|
# Ask Gemini what it saw in the PDF using REST API
|
||||||
def _ask_what_gemini_sees():
|
explanation_payload = {
|
||||||
return self.client.models.generate_content(
|
"contents": [{
|
||||||
model=self.model,
|
"parts": [
|
||||||
contents=[
|
{"inline_data": {"mime_type": "application/pdf", "data": pdf_base64}},
|
||||||
types.Part.from_bytes(
|
{"text": "Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."}
|
||||||
data=pdf_bytes,
|
|
||||||
mime_type='application/pdf',
|
|
||||||
),
|
|
||||||
"Please describe what you see in this PDF document. What is the main content? Are there any questions, exercises, or test items? Respond in Chinese."
|
|
||||||
]
|
]
|
||||||
)
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
import asyncio
|
explanation_response = await self.client.post(url, headers=headers, params=params, json=explanation_payload)
|
||||||
explanation_response = await asyncio.to_thread(_ask_what_gemini_sees)
|
explanation_response.raise_for_status()
|
||||||
explanation = explanation_response.text
|
explanation_data = explanation_response.json()
|
||||||
|
explanation = explanation_data["candidates"][0]["content"]["parts"][0]["text"]
|
||||||
print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True)
|
print(f"[Gemini PDF] 📄 Gemini sees: {explanation[:500]}...", flush=True)
|
||||||
|
|
||||||
raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...")
|
raise Exception(f"No questions found in PDF. Gemini's description: {explanation[:200]}...")
|
||||||
@@ -661,17 +648,20 @@ Return ONLY the JSON object, no markdown or explanations."""
|
|||||||
)
|
)
|
||||||
result = response.content[0].text
|
result = response.content[0].text
|
||||||
elif self.provider == "gemini":
|
elif self.provider == "gemini":
|
||||||
# Gemini uses different API
|
# Gemini uses REST API
|
||||||
import asyncio
|
url = f"{self.gemini_base_url}/v1beta/models/{self.model}:generateContent"
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
params = {"key": self.gemini_api_key}
|
||||||
|
payload = {
|
||||||
|
"contents": [{
|
||||||
|
"parts": [{"text": prompt}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
def _generate_content():
|
response = await self.client.post(url, headers=headers, params=params, json=payload)
|
||||||
return self.client.models.generate_content(
|
response.raise_for_status()
|
||||||
model=self.model,
|
response_data = response.json()
|
||||||
contents=prompt
|
result = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
||||||
)
|
|
||||||
|
|
||||||
response = await asyncio.to_thread(_generate_content)
|
|
||||||
result = response.text
|
|
||||||
else: # OpenAI or Qwen
|
else: # OpenAI or Qwen
|
||||||
response = await self.client.chat.completions.create(
|
response = await self.client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
|
|||||||
Reference in New Issue
Block a user