Your Name vor 5 Tagen
Ursprung
Commit
7ea9f53dab

+ 14 - 1
Makefile

@@ -1,4 +1,4 @@
-.PHONY: build run dev web-dev web-build docker-up docker-down docker-logs db-migrate
+.PHONY: build run dev web-dev web-build docker-up docker-down docker-logs db-migrate local-up local-down local-logs local-clean
 
 # 本地开发
 build:
@@ -27,6 +27,19 @@ docker-down:
 docker-logs:
 	docker-compose -f deploy/docker-compose.yml logs -f api
 
+# Docker Local (本地全栈,含 MySQL + Redis)
+local-up:
+	docker-compose -f deploy/docker-compose.local.yml up -d --build
+
+local-down:
+	docker-compose -f deploy/docker-compose.local.yml down
+
+local-logs:
+	docker-compose -f deploy/docker-compose.local.yml logs -f api
+
+local-clean:
+	docker-compose -f deploy/docker-compose.local.yml down -v
+
 # 数据库
 db-migrate:
 	go run ./cmd/server -migrate-only

+ 1 - 1
cmd/server/main.go

@@ -191,7 +191,7 @@ func main() {
 
 	// 10. Register plugins
 	registry := plugin.NewRegistry()
-	registry.Register(webcollector.New(serperClient))
+	registry.Register(webcollector.New(serperClient, rdb))
 	registry.Register(tgcollector.New(tgManager, llmClient, s))
 	registry.Register(githubcollector.New(cfg.GitHub.Token, s))
 

+ 1 - 1
deploy/Dockerfile.api

@@ -7,7 +7,7 @@ COPY go.mod go.sum ./
 RUN go mod download
 COPY internal/ internal/
 COPY cmd/ cmd/
-RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /app/server ./cmd/server
+RUN GOMAXPROCS=2 CGO_ENABLED=0 GOOS=linux go build -p 2 -ldflags="-s -w" -o /app/server ./cmd/server
 
 FROM alpine:3.19
 RUN apk add --no-cache ca-certificates tzdata wget

+ 91 - 0
deploy/docker-compose.local.yml

@@ -0,0 +1,91 @@
+services:
+  im_mysql:
+    image: mysql:8.0
+    container_name: im_mysql
+    environment:
+      MYSQL_ROOT_PASSWORD: root123
+      MYSQL_DATABASE: spider
+      MYSQL_CHARACTER_SET_SERVER: utf8mb4
+      MYSQL_COLLATION_SERVER: utf8mb4_unicode_ci
+    ports:
+      - "3307:3306"
+    volumes:
+      - mysql_data:/var/lib/mysql
+    healthcheck:
+      test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-proot123"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+      start_period: 300s
+    networks:
+      - chat_im_network
+
+  im_redis:
+    image: redis:7-alpine
+    container_name: im_redis
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+    networks:
+      - chat_im_network
+
+  api:
+    build:
+      context: ..
+      dockerfile: deploy/Dockerfile.api
+    container_name: spider_api
+    ports:
+      - "8200:8080"
+    volumes:
+      - ../configs:/app/configs
+      - ../sessions:/app/sessions
+    environment:
+      - GIN_MODE=release
+      - TG_SECRET_KEY=${TG_SECRET_KEY:-d9RkIHw7wLaa5Qx+B7/QOw5K6+bW67Ife4UK91h4Z2Q=}
+    depends_on:
+      im_mysql:
+        condition: service_healthy
+      im_redis:
+        condition: service_healthy
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "wget", "-q", "-O-", "http://localhost:8080/ping"]
+      interval: 15s
+      timeout: 5s
+      retries: 3
+      start_period: 120s
+    networks:
+      - chat_im_network
+
+  web:
+    build:
+      context: ..
+      dockerfile: deploy/Dockerfile.web
+    container_name: spider_web
+    ports:
+      - "8300:80"
+    depends_on:
+      api:
+        condition: service_healthy
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-sf", "http://localhost:80/"]
+      interval: 30s
+      timeout: 5s
+      retries: 3
+    networks:
+      - chat_im_network
+
+networks:
+  chat_im_network:
+    driver: bridge
+
+volumes:
+  mysql_data:
+  redis_data:

+ 6 - 2
deploy/nginx.conf

@@ -23,14 +23,18 @@ server {
 
     # Health check (no proxy)
     location /ping {
-        proxy_pass http://api:8080;
+        resolver 127.0.0.11 valid=10s ipv6=off;
+        set $api_upstream http://api:8080;
+        proxy_pass $api_upstream;
         proxy_connect_timeout 5s;
         proxy_read_timeout 5s;
     }
 
     # API 代理(含 WebSocket)
     location /api/ {
-        proxy_pass http://api:8080;
+        resolver 127.0.0.11 valid=10s ipv6=off;
+        set $api_upstream http://api:8080;
+        proxy_pass $api_upstream;
         proxy_http_version 1.1;
         proxy_set_header Upgrade $http_upgrade;
         proxy_set_header Connection "upgrade";

+ 7 - 2
internal/handler/auth.go

@@ -216,11 +216,16 @@ func (h *AuthHandler) Logout(c *gin.Context) {
 func JWTAuth() gin.HandlerFunc {
 	return func(c *gin.Context) {
 		auth := c.GetHeader("Authorization")
-		if auth == "" || !strings.HasPrefix(auth, "Bearer ") {
+		var tokenStr string
+		if auth != "" && strings.HasPrefix(auth, "Bearer ") {
+			tokenStr = strings.TrimPrefix(auth, "Bearer ")
+		} else if q := c.Query("token"); q != "" {
+			// WebSocket connections pass token as query param
+			tokenStr = q
+		} else {
 			c.AbortWithStatusJSON(http.StatusUnauthorized, Response{Code: 401, Message: "未登录"})
 			return
 		}
-		tokenStr := strings.TrimPrefix(auth, "Bearer ")
 
 		// Check blacklist
 		if authRedis != nil {

+ 2 - 0
internal/handler/router.go

@@ -231,6 +231,8 @@ func SetupRouter(s *store.Store, taskMgr *task.Manager, rdb *redis.Client, tgMgr
 	protected.GET("/settings/grading", sh.GetGrading)
 	protected.PUT("/settings/grading", RequireRole("admin"), sh.UpdateGrading)
 	protected.POST("/settings/grading/reset", RequireRole("admin"), sh.ResetGrading)
+	protected.GET("/settings/api-keys", RequireRole("admin"), sh.GetAPIKeys)
+	protected.PUT("/settings/api-keys", RequireRole("admin"), sh.UpdateAPIKeys)
 
 	// Audit logs (admin only)
 	ah := &AuditHandler{store: s}

+ 25 - 0
internal/handler/setting.go

@@ -76,6 +76,31 @@ func (h *SettingHandler) GetLevelMap(c *gin.Context) {
 	OK(c, m)
 }
 
+// GetAPIKeys handles GET /settings/api-keys
+func (h *SettingHandler) GetAPIKeys(c *gin.Context) {
+	cfg, err := h.store.GetAPIKeysConfig()
+	if err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+	OK(c, cfg)
+}
+
+// UpdateAPIKeys handles PUT /settings/api-keys
+func (h *SettingHandler) UpdateAPIKeys(c *gin.Context) {
+	var cfg model.APIKeysConfig
+	if err := c.ShouldBindJSON(&cfg); err != nil {
+		Fail(c, 400, err.Error())
+		return
+	}
+	if err := h.store.SetSetting("api_keys", cfg); err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+	LogAudit(h.store, c, "update", "setting", "api_keys", nil)
+	OK(c, cfg)
+}
+
 // GetAllSettings handles GET /settings — returns all settings keys.
 func (h *SettingHandler) GetAllSettings(c *gin.Context) {
 	var settings []model.Setting

+ 10 - 0
internal/model/setting.go

@@ -14,6 +14,16 @@ type Setting struct {
 	UpdatedAt time.Time      `json:"updated_at"`
 }
 
+// APIKeysConfig stores third-party API keys under the "api_keys" setting key.
+type APIKeysConfig struct {
+	SerperAPIKey string `json:"serper_api_key"`
+	LLMProvider  string `json:"llm_provider"`
+	LLMBaseURL   string `json:"llm_base_url"`
+	LLMAPIKey    string `json:"llm_api_key"`
+	LLMModel     string `json:"llm_model"`
+	GitHubToken  string `json:"github_token"`
+}
+
 // GradingConfig is the structure stored under the "grading" setting key.
 type GradingConfig struct {
 	Levels          []LevelDef        `json:"levels"`

+ 97 - 8
internal/plugins/webcollector/collector.go

@@ -2,6 +2,7 @@ package webcollector
 
 import (
 	"context"
+	"encoding/json"
 	"log"
 	"net/url"
 	"regexp"
@@ -9,6 +10,8 @@ import (
 	"sync/atomic"
 	"time"
 
+	"github.com/redis/go-redis/v9"
+
 	"spider/internal/crawler"
 	"spider/internal/extractor"
 	"spider/internal/plugin"
@@ -16,6 +19,8 @@ import (
 	"spider/internal/search"
 )
 
+const snapshotKey = "spider:webcollector:snapshot"
+
 // Collector implements plugin.Collector for web-based merchant collection.
 type Collector struct {
 	serper       *search.SerperClient
@@ -25,15 +30,17 @@ type Collector struct {
 	stopped      atomic.Bool
 	logger       plugin.TaskLogger
 	proxyPool    *proxypool.Pool
+	rdb          *redis.Client
 }
 
-func New(serper *search.SerperClient) *Collector {
+func New(serper *search.SerperClient, rdb *redis.Client) *Collector {
 	return &Collector{
-		serper:       serper,
-		static:       crawler.NewStaticCrawler(),
-		dynamic:      crawler.NewDynamicCrawler(),
+		serper: serper,
+		static: crawler.NewStaticCrawler(),
+		dynamic: crawler.NewDynamicCrawler(),
 		tmeValidator: crawler.NewTMeValidator(),
-		logger:       plugin.NopLogger(),
+		logger: plugin.NopLogger(),
+		rdb:    rdb,
 	}
 }
 
@@ -67,12 +74,50 @@ func (c *Collector) Run(ctx context.Context, cfg map[string]any, callback func(p
 		return nil
 	}
 
+	// Stop conditions
+	maxMerchants, _ := cfg["max_merchants"].(int)
+	maxDurationMins, _ := cfg["max_duration_mins"].(int)
+	resumeSnapshot, _ := cfg["resume_snapshot"].(bool)
+
+	var deadline time.Time
+	if maxDurationMins > 0 {
+		deadline = time.Now().Add(time.Duration(maxDurationMins) * time.Minute)
+		log.Printf("[web_collector] will stop after %d minutes", maxDurationMins)
+	}
+
+	// Load or init snapshot
+	snapshot := c.loadSnapshot(ctx)
+	if !resumeSnapshot {
+		snapshot = map[string]bool{}
+		log.Println("[web_collector] starting fresh (snapshot cleared)")
+	} else if len(snapshot) > 0 {
+		log.Printf("[web_collector] resuming from snapshot, %d queries already done", len(snapshot))
+	}
+
+	merchantCount := 0
+	wrappedCallback := func(md plugin.MerchantData) {
+		callback(md)
+		merchantCount++
+	}
+
 	queries := expandSearchQueries(keywords)
 
 	for _, q := range queries {
 		if c.stopped.Load() || ctx.Err() != nil {
 			break
 		}
+		if maxMerchants > 0 && merchantCount >= maxMerchants {
+			log.Printf("[web_collector] reached max_merchants limit (%d), stopping", maxMerchants)
+			break
+		}
+		if !deadline.IsZero() && time.Now().After(deadline) {
+			log.Printf("[web_collector] reached max_duration limit, stopping")
+			break
+		}
+		if snapshot[q] {
+			log.Printf("[web_collector] skipping (snapshot): %s", q)
+			continue
+		}
 
 		// Rotate proxy for each query if using pool
 		c.rotateProxy()
@@ -90,7 +135,7 @@ func (c *Collector) Run(ctx context.Context, cfg map[string]any, callback func(p
 			for i, r := range results {
 				c.logger.LogSearchResult(q+" [organic]", i+1, r.Title, r.URL, r.Snippet)
 			}
-			c.processResults(ctx, results, q, callback)
+			c.processResults(ctx, results, q, wrappedCallback)
 		}
 
 		time.Sleep(1 * time.Second)
@@ -106,9 +151,13 @@ func (c *Collector) Run(ctx context.Context, cfg map[string]any, callback func(p
 			for i, r := range videoResults {
 				c.logger.LogSearchResult(q+" [video]", i+1, r.Title, r.URL, r.Snippet)
 			}
-			c.processResults(ctx, videoResults, q, callback)
+			c.processResults(ctx, videoResults, q, wrappedCallback)
 		}
 
+		// Mark query as done in snapshot
+		snapshot[q] = true
+		c.saveSnapshot(ctx, snapshot)
+
 		select {
 		case <-ctx.Done():
 			return nil
@@ -116,10 +165,50 @@ func (c *Collector) Run(ctx context.Context, cfg map[string]any, callback func(p
 		}
 	}
 
-	log.Println("[web_collector] done")
+	// If all queries done naturally, clear snapshot for next full run
+	allDone := true
+	for _, q := range queries {
+		if !snapshot[q] {
+			allDone = false
+			break
+		}
+	}
+	if allDone && c.rdb != nil {
+		c.rdb.Del(ctx, snapshotKey)
+		log.Println("[web_collector] all queries done, snapshot cleared")
+	}
+
+	log.Printf("[web_collector] done, collected %d merchants", merchantCount)
 	return nil
 }
 
+func (c *Collector) loadSnapshot(ctx context.Context) map[string]bool {
+	if c.rdb == nil {
+		return map[string]bool{}
+	}
+	data, err := c.rdb.Get(ctx, snapshotKey).Bytes()
+	if err != nil {
+		return map[string]bool{}
+	}
+	var snapshot map[string]bool
+	if err := json.Unmarshal(data, &snapshot); err != nil {
+		return map[string]bool{}
+	}
+	return snapshot
+}
+
+func (c *Collector) saveSnapshot(ctx context.Context, snapshot map[string]bool) {
+	if c.rdb == nil {
+		return
+	}
+	data, err := json.Marshal(snapshot)
+	if err != nil {
+		return
+	}
+	// Keep snapshot for 7 days
+	c.rdb.Set(ctx, snapshotKey, data, 7*24*time.Hour)
+}
+
 // processResults handles search results with full logging at every node.
 func (c *Collector) processResults(ctx context.Context, results []search.SearchResult, query string, callback func(plugin.MerchantData)) {
 	for _, r := range results {

+ 16 - 0
internal/store/setting_repo.go

@@ -40,6 +40,22 @@ func (s *Store) SetSetting(key string, value any) error {
 	return s.DB.Model(&existing).Update("value", datatypes.JSON(data)).Error
 }
 
+// GetAPIKeysConfig reads the API keys config from settings.
+func (s *Store) GetAPIKeysConfig() (*model.APIKeysConfig, error) {
+	setting, err := s.GetSetting("api_keys")
+	if err != nil {
+		return nil, err
+	}
+	if setting == nil {
+		return &model.APIKeysConfig{}, nil
+	}
+	var cfg model.APIKeysConfig
+	if err := json.Unmarshal(setting.Value, &cfg); err != nil {
+		return &model.APIKeysConfig{}, nil
+	}
+	return &cfg, nil
+}
+
 // GetGradingConfig reads the grading config from settings, or returns default.
 func (s *Store) GetGradingConfig() (*model.GradingConfig, error) {
 	setting, err := s.GetSetting("grading")

+ 21 - 7
internal/task/manager.go

@@ -21,11 +21,14 @@ import (
 
 // StartRequest is the payload for starting a new task.
 type StartRequest struct {
-	PluginName    string `json:"plugin_name" binding:"required"`
-	AutoClean     *bool  `json:"auto_clean"`                       // run processor after collection (default true)
-	TargetGroup   string `json:"target_group,omitempty"`           // target a specific TG group/channel for collection
-	ProxyID       *uint  `json:"proxy_id,omitempty"`               // optional single proxy for this task
-	ProxyMode     string `json:"proxy_mode,omitempty"`             // "single" (default) or "pool"
+	PluginName       string `json:"plugin_name" binding:"required"`
+	AutoClean        *bool  `json:"auto_clean"`                       // run processor after collection (default true)
+	TargetGroup      string `json:"target_group,omitempty"`           // target a specific TG group/channel for collection
+	ProxyID          *uint  `json:"proxy_id,omitempty"`               // optional single proxy for this task
+	ProxyMode        string `json:"proxy_mode,omitempty"`             // "single" (default) or "pool"
+	MaxMerchants     int    `json:"max_merchants,omitempty"`          // stop after collecting this many merchants (0 = unlimited)
+	MaxDurationMins  int    `json:"max_duration_mins,omitempty"`      // stop after this many minutes (0 = unlimited)
+	ResumeSnapshot   bool   `json:"resume_snapshot,omitempty"`        // resume from last snapshot
 }
 
 // Manager manages plugin task lifecycle using goroutines.
@@ -136,6 +139,15 @@ func (m *Manager) StartTask(req StartRequest) (*model.TaskLog, error) {
 		cfg["proxy_url"] = proxyURL
 	}
 
+	// Stop conditions
+	if req.MaxMerchants > 0 {
+		cfg["max_merchants"] = req.MaxMerchants
+	}
+	if req.MaxDurationMins > 0 {
+		cfg["max_duration_mins"] = req.MaxDurationMins
+	}
+	cfg["resume_snapshot"] = req.ResumeSnapshot
+
 	// If targeting a specific group, override seeds config
 	if req.TargetGroup != "" {
 		cfg["seeds"] = []string{req.TargetGroup}
@@ -236,8 +248,10 @@ func (m *Manager) runTask(ctx context.Context, taskLog *model.TaskLog, collector
 
 	m.writeLog(ctx, taskLog.ID, fmt.Sprintf("采集完成: 新增 %d 个商户", merchantCount))
 
-	// Auto-clean: run processor on new raw records
-	if autoClean && merchantCount > 0 {
+	// Auto-clean: run processor if there are any unprocessed raw records
+	var pendingRaw int64
+	m.db.Model(&model.MerchantRaw{}).Where("status = ?", "raw").Count(&pendingRaw)
+	if autoClean && pendingRaw > 0 {
 		m.writeLog(ctx, taskLog.ID, "开始清洗流程...")
 		m.writeProgress(ctx, taskLog.ID, "clean", 0, 0, "清洗中...")
 

+ 16 - 1
web/src/api/index.ts

@@ -20,6 +20,9 @@ export interface StartTaskRequest {
   target_group?: string
   proxy_id?: number
   proxy_mode?: 'single' | 'pool'
+  max_merchants?: number
+  max_duration_mins?: number
+  resume_snapshot?: boolean
 }
 
 export interface TaskLog {
@@ -254,6 +257,17 @@ export const updateGradingConfig = (data: GradingConfig) => api.put<unknown, Api
 export const resetGradingConfig = () => api.post<unknown, ApiResponse<GradingConfig>>('/settings/grading/reset')
 export const getLevelMap = () => api.get<unknown, ApiResponse<Record<string, { label: string; color: string; description: string }>>>('/settings/level-map')
 
+export interface APIKeysConfig {
+  serper_api_key: string
+  llm_provider: string
+  llm_base_url: string
+  llm_api_key: string
+  llm_model: string
+  github_token: string
+}
+export const getAPIKeys = () => api.get<unknown, ApiResponse<APIKeysConfig>>('/settings/api-keys')
+export const updateAPIKeys = (data: APIKeysConfig) => api.put<unknown, ApiResponse<APIKeysConfig>>('/settings/api-keys', data)
+
 // Groups
 export interface GroupSummary {
   group_username: string
@@ -564,5 +578,6 @@ export const deleteChannel = (id: number) =>
 // Build WebSocket URL for task logs
 export function buildTaskLogWsUrl(taskId: number): string {
   const proto = window.location.protocol === 'https:' ? 'wss:' : 'ws:'
-  return `${proto}//${window.location.host}/api/v1/tasks/${taskId}/logs`
+  const token = localStorage.getItem('token') ?? ''
+  return `${proto}//${window.location.host}/api/v1/tasks/${taskId}/logs?token=${encodeURIComponent(token)}`
 }

+ 37 - 0
web/src/components/TaskControl.tsx

@@ -13,6 +13,9 @@ import {
   Tag,
   Select,
   Radio,
+  InputNumber,
+  Checkbox,
+  Divider,
 } from 'antd'
 import { StopOutlined, ThunderboltOutlined } from '@ant-design/icons'
 import { startTask, getTask, getEnabledProxies, getProxyPoolStatus, type StartTaskRequest, type Proxy, type ProxyPoolStatus } from '../api'
@@ -59,6 +62,9 @@ export default function TaskControl({ onTaskStarted, onTaskFinished }: TaskContr
   const [progress, setProgress] = useState<ProgressInfo | null>(null)
   const [merchants, setMerchants] = useState(0)
   const [errors, setErrors] = useState(0)
+  const [maxMerchants, setMaxMerchants] = useState<number | null>(null)
+  const [maxDurationMins, setMaxDurationMins] = useState<number | null>(null)
+  const [resumeSnapshot, setResumeSnapshot] = useState(false)
   const pollerRef = useRef<ReturnType<typeof setInterval> | null>(null)
 
   // Load available proxies
@@ -160,6 +166,9 @@ export default function TaskControl({ onTaskStarted, onTaskFinished }: TaskContr
         auto_clean: selectedPlugin.name !== 'clean',
         ...(proxyMode === 'single' && selectedProxyId ? { proxy_id: selectedProxyId } : {}),
         ...(proxyMode === 'pool' ? { proxy_mode: 'pool' } : {}),
+        ...(selectedPlugin.name === 'web_collector' && maxMerchants ? { max_merchants: maxMerchants } : {}),
+        ...(selectedPlugin.name === 'web_collector' && maxDurationMins ? { max_duration_mins: maxDurationMins } : {}),
+        ...(selectedPlugin.name === 'web_collector' ? { resume_snapshot: resumeSnapshot } : {}),
       }
       const res = await startTask(req)
       setRunningTask(res.data)
@@ -363,6 +372,34 @@ export default function TaskControl({ onTaskStarted, onTaskFinished }: TaskContr
             )}
           </div>
         )}
+
+        {selectedPlugin?.name === 'web_collector' && (
+          <>
+            <Divider style={{ margin: '12px 0' }} />
+            <Text strong style={{ display: 'block', marginBottom: 8 }}>停止条件</Text>
+            <Row gutter={16} style={{ marginBottom: 8 }}>
+              <Col span={12}>
+                <Text type="secondary" style={{ display: 'block', fontSize: 12, marginBottom: 4 }}>采集商户上限(0=不限)</Text>
+                <InputNumber
+                  min={0} style={{ width: '100%' }} placeholder="不限制"
+                  value={maxMerchants ?? 0}
+                  onChange={v => setMaxMerchants(v || null)}
+                />
+              </Col>
+              <Col span={12}>
+                <Text type="secondary" style={{ display: 'block', fontSize: 12, marginBottom: 4 }}>最长运行时间(分钟,0=不限)</Text>
+                <InputNumber
+                  min={0} style={{ width: '100%' }} placeholder="不限制"
+                  value={maxDurationMins ?? 0}
+                  onChange={v => setMaxDurationMins(v || null)}
+                />
+              </Col>
+            </Row>
+            <Checkbox checked={resumeSnapshot} onChange={e => setResumeSnapshot(e.target.checked)}>
+              从上次快照继续(跳过已搜索的关键词)
+            </Checkbox>
+          </>
+        )}
       </Modal>
     </Card>
   )

+ 74 - 2
web/src/pages/Settings.tsx

@@ -1,7 +1,7 @@
 import { useEffect, useState, useRef } from 'react'
 import { Card, Form, Input, Button, Tag, Space, message, Popconfirm, Typography, Row, Col, Collapse, Switch, InputNumber, Tooltip, Tabs, Descriptions, Modal, type InputRef } from 'antd'
 import { PlusOutlined, DeleteOutlined, UndoOutlined, SaveOutlined, QuestionCircleOutlined, DownloadOutlined } from '@ant-design/icons'
-import { getGradingConfig, updateGradingConfig, resetGradingConfig, getSystemHealth, getBackupStats, type GradingConfig, type LevelDef, type GradeRule, type SystemHealth } from '../api'
+import { getGradingConfig, updateGradingConfig, resetGradingConfig, getSystemHealth, getBackupStats, getAPIKeys, updateAPIKeys, type GradingConfig, type LevelDef, type GradeRule, type SystemHealth, type APIKeysConfig } from '../api'
 import { useAppStore } from '../store'
 
 const { Text, Title } = Typography
@@ -223,6 +223,75 @@ function GradingTab() {
   )
 }
 
+function APIKeysTab() {
+  const [cfg, setCfg] = useState<APIKeysConfig | null>(null)
+  const [loading, setLoading] = useState(true)
+  const [saving, setSaving] = useState(false)
+
+  useEffect(() => {
+    getAPIKeys().then(r => setCfg(r.data)).catch(() => message.error('加载失败')).finally(() => setLoading(false))
+  }, [])
+
+  const handleSave = async () => {
+    if (!cfg) return
+    setSaving(true)
+    try {
+      await updateAPIKeys(cfg)
+      message.success('保存成功,下次任务生效')
+    } catch {
+      message.error('保存失败')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  if (loading || !cfg) return <div>加载中...</div>
+
+  const field = (label: string, key: keyof APIKeysConfig, placeholder?: string, password?: boolean) => (
+    <Form.Item label={label} style={{ marginBottom: 12 }}>
+      {password
+        ? <Input.Password value={cfg[key]} placeholder={placeholder} onChange={e => setCfg({ ...cfg, [key]: e.target.value })} />
+        : <Input value={cfg[key]} placeholder={placeholder} onChange={e => setCfg({ ...cfg, [key]: e.target.value })} />}
+    </Form.Item>
+  )
+
+  return (
+    <div>
+      <Row justify="end" style={{ marginBottom: 16 }}>
+        <Button type="primary" icon={<SaveOutlined />} loading={saving} onClick={handleSave}>保存配置</Button>
+      </Row>
+
+      <Card title="网页搜索(Serper)" style={{ marginBottom: 16 }}>
+        <Text type="secondary" style={{ display: 'block', marginBottom: 12 }}>
+          用于搜索阶段的 Google 搜索 API,从 <a href="https://serper.dev" target="_blank" rel="noreferrer">serper.dev</a> 获取。
+        </Text>
+        {field('Serper API Key', 'serper_api_key', '输入 Serper API Key', true)}
+      </Card>
+
+      <Card title="大模型(LLM)" style={{ marginBottom: 16 }}>
+        <Text type="secondary" style={{ display: 'block', marginBottom: 12 }}>
+          用于清洗阶段的 AI 提取,兼容 OpenAI 接口格式。
+        </Text>
+        <Row gutter={16}>
+          <Col span={8}>{field('Provider', 'llm_provider', 'openai / azure / deepseek ...')}</Col>
+          <Col span={16}>{field('Base URL', 'llm_base_url', 'https://api.openai.com/v1')}</Col>
+        </Row>
+        <Row gutter={16}>
+          <Col span={12}>{field('API Key', 'llm_api_key', '输入 LLM API Key', true)}</Col>
+          <Col span={12}>{field('Model', 'llm_model', 'gpt-4o-mini')}</Col>
+        </Row>
+      </Card>
+
+      <Card title="GitHub(可选)">
+        <Text type="secondary" style={{ display: 'block', marginBottom: 12 }}>
+          用于搜索 GitHub 上的商户相关资源,留空则跳过。
+        </Text>
+        {field('GitHub Token', 'github_token', '输入 GitHub Personal Access Token', true)}
+      </Card>
+    </div>
+  )
+}
+
 function SystemInfoTab() {
   const [health, setHealth] = useState<SystemHealth | null>(null)
   const [loading, setLoading] = useState(true)
@@ -312,7 +381,10 @@ export default function Settings() {
 
   const tabs = [
     { key: 'grading', label: '分级设置', children: <GradingTab /> },
-    ...(isAdmin() ? [{ key: 'system', label: '系统信息', children: <SystemInfoTab /> }] : []),
+    ...(isAdmin() ? [
+      { key: 'api_keys', label: 'API 配置', children: <APIKeysTab /> },
+      { key: 'system', label: '系统信息', children: <SystemInfoTab /> },
+    ] : []),
   ]
 
   return <Tabs defaultActiveKey="grading" items={tabs} />

+ 6 - 1
web/src/pages/Tasks.tsx

@@ -125,8 +125,13 @@ export default function Tasks() {
     }
 
     ws.onmessage = (e) => {
+      const line: string = e.data
+      // Stop reconnecting when server signals task is done
+      if (line.includes('任务已结束') || line.includes('任务完成') || line.includes('任务已停止') || line.includes('采集失败')) {
+        wsIntentionalClose.current = true
+      }
       setLogLines(prev => {
-        const next = [...prev, e.data]
+        const next = [...prev, line]
         return next.length > 500 ? next.slice(-500) : next
       })
       setTimeout(() => {