dot 3 viikkoa sitten
commit
deb8b014c0
92 muutettua tiedostoa jossa 10420 lisäystä ja 0 poistoa
  1. 34 0
      Makefile
  2. 64 0
      README.md
  3. 117 0
      cmd/server/main.go
  4. 30 0
      cmd/server/seed.go
  5. 35 0
      configs/config.yaml
  6. 14 0
      deploy/Dockerfile.api
  7. 12 0
      deploy/Dockerfile.web
  8. 37 0
      deploy/docker-compose.yml
  9. 16 0
      deploy/init.sql
  10. 24 0
      deploy/nginx.conf
  11. 820 0
      docs/design-spec.md
  12. 295 0
      docs/implementation-plan.md
  13. 120 0
      go.mod
  14. 795 0
      go.sum
  15. 89 0
      internal/config/config.go
  16. 0 0
      internal/crawler/.gitkeep
  17. 66 0
      internal/crawler/dynamic.go
  18. 107 0
      internal/crawler/filter.go
  19. 78 0
      internal/crawler/static.go
  20. 62 0
      internal/crawler/tme_validator.go
  21. 0 0
      internal/extractor/.gitkeep
  22. 21 0
      internal/extractor/llm_extractor.go
  23. 137 0
      internal/extractor/regex.go
  24. 23 0
      internal/extractor/types.go
  25. 0 0
      internal/handler/.gitkeep
  26. 82 0
      internal/handler/channel.go
  27. 77 0
      internal/handler/config.go
  28. 61 0
      internal/handler/dashboard.go
  29. 138 0
      internal/handler/keyword.go
  30. 186 0
      internal/handler/merchant.go
  31. 38 0
      internal/handler/nav_site.go
  32. 104 0
      internal/handler/response.go
  33. 78 0
      internal/handler/router.go
  34. 120 0
      internal/handler/seed.go
  35. 250 0
      internal/handler/task.go
  36. 0 0
      internal/llm/.gitkeep
  37. 178 0
      internal/llm/client.go
  38. 18 0
      internal/model/channel.go
  39. 12 0
      internal/model/config_revision.go
  40. 11 0
      internal/model/keyword.go
  41. 30 0
      internal/model/merchant_clean.go
  42. 18 0
      internal/model/merchant_raw.go
  43. 14 0
      internal/model/nav_site.go
  44. 12 0
      internal/model/seed.go
  45. 13 0
      internal/model/setting.go
  46. 20 0
      internal/model/task.go
  47. 0 0
      internal/pipeline/.gitkeep
  48. 46 0
      internal/pipeline/phase.go
  49. 183 0
      internal/pipeline/phase1_discover.go
  50. 129 0
      internal/pipeline/phase2_search.go
  51. 250 0
      internal/pipeline/phase3_github.go
  52. 220 0
      internal/pipeline/phase4_scrape.go
  53. 200 0
      internal/pipeline/phase5_crawl.go
  54. 322 0
      internal/pipeline/phase6_clean.go
  55. 126 0
      internal/pipeline/phase7_score.go
  56. 112 0
      internal/pipeline/pipeline.go
  57. 0 0
      internal/search/.gitkeep
  58. 141 0
      internal/search/serper.go
  59. 0 0
      internal/service/.gitkeep
  60. 146 0
      internal/service/settings_service.go
  61. 181 0
      internal/service/task_service.go
  62. 0 0
      internal/telegram/.gitkeep
  63. 176 0
      internal/telegram/account_manager.go
  64. 418 0
      internal/telegram/client.go
  65. 56 0
      internal/telegram/types.go
  66. 0 0
      internal/worker/.gitkeep
  67. 325 0
      internal/worker/worker.go
  68. BIN
      server.exe
  69. 0 0
      sessions/.gitkeep
  70. 0 0
      web/.gitkeep
  71. 12 0
      web/index.html
  72. 27 0
      web/package.json
  73. 33 0
      web/src/App.tsx
  74. 177 0
      web/src/api/index.ts
  75. 126 0
      web/src/components/Layout.tsx
  76. 183 0
      web/src/components/TaskControl.tsx
  77. 13 0
      web/src/main.tsx
  78. 192 0
      web/src/pages/Channels.tsx
  79. 171 0
      web/src/pages/Dashboard.tsx
  80. 228 0
      web/src/pages/Keywords.tsx
  81. 194 0
      web/src/pages/Logs.tsx
  82. 182 0
      web/src/pages/MerchantsClean.tsx
  83. 162 0
      web/src/pages/MerchantsRaw.tsx
  84. 138 0
      web/src/pages/NavSites.tsx
  85. 217 0
      web/src/pages/Seeds.tsx
  86. 167 0
      web/src/pages/Settings.tsx
  87. 154 0
      web/src/pages/Tasks.tsx
  88. 12 0
      web/src/store/index.ts
  89. 21 0
      web/tsconfig.json
  90. 10 0
      web/tsconfig.node.json
  91. 12 0
      web/vite.config.ts
  92. 502 0
      爬虫文档.md

+ 34 - 0
Makefile

@@ -0,0 +1,34 @@
+.PHONY: build run dev web-dev web-build docker-up docker-down docker-logs db-migrate
+
+# 本地开发
+build:
+	go build -o bin/server ./cmd/server
+
+run: build
+	./bin/server
+
+dev:
+	go run ./cmd/server
+
+# 前端开发
+web-dev:
+	cd web && npm install && npm run dev
+
+web-build:
+	cd web && npm install && npm run build
+
+# Docker
+docker-up:
+	docker-compose -f deploy/docker-compose.yml up -d --build
+
+docker-down:
+	docker-compose -f deploy/docker-compose.yml down
+
+docker-logs:
+	docker-compose -f deploy/docker-compose.yml logs -f api
+
+# 数据库
+db-migrate:
+	go run ./cmd/server -migrate-only
+
+.DEFAULT_GOAL := build

+ 64 - 0
README.md

@@ -0,0 +1,64 @@
+# 商户查找系统
+
+自动从 Telegram 频道、搜索引擎、GitHub、导航网站挖掘商户联系方式。
+
+## 技术栈
+
+- **后端**: Go + Gin + GORM + asynq
+- **前端**: React + Ant Design
+- **数据库**: MySQL
+- **缓存/队列**: Redis
+
+## 快速开始
+
+### 1. 配置
+
+编辑 `configs/config.yaml`,填写:
+- MySQL 连接信息(默认连接 host.docker.internal:3306)
+- Redis 连接信息(默认 db 3)
+- TG App ID/Hash(从 my.telegram.org 申请)
+- LLM API Key(OpenAI 兼容接口)
+- Serper API Key(Google 搜索)
+- GitHub Token(可选,提升 API 限额)
+
+### 2. Docker 启动
+
+```bash
+# 启动所有服务(需要已有 MySQL 和 Redis)
+docker-compose -f deploy/docker-compose.yml up -d --build
+
+# 查看日志
+docker-compose -f deploy/docker-compose.yml logs -f api
+```
+
+### 3. 本地开发
+
+```bash
+# 后端
+go run ./cmd/server
+
+# 前端(新终端)
+cd web && npm install && npm run dev
+```
+
+### 4. 访问
+
+- 管理后台: http://localhost (Docker) 或 http://localhost:5173 (本地开发)
+- API: http://localhost:8080/api/v1
+
+## 配置说明
+
+如果 MySQL/Redis 是 Docker 容器(不是宿主机服务):
+1. 找到容器所在的 Docker 网络名:`docker network ls`
+2. 修改 `deploy/docker-compose.yml` 中 `external_db.name` 为实际网络名
+3. 修改 `configs/config.yaml` 中 `mysql.host` 和 `redis.host` 为容器名
+
+## 7 阶段 Pipeline
+
+1. **discover** - TG 频道裂变(从种子频道 BFS 发现新频道)
+2. **search** - Serper 搜索引擎(关键词搜索发现频道和导航站)
+3. **github** - GitHub README 挖掘(搜索含 TG 链接的仓库)
+4. **scrape** - TG 消息采集(读取频道历史消息提取商户)
+5. **crawl** - 网页爬取(爬取导航站提取商户)
+6. **clean** - 数据清洗(黑名单过滤 + 去重 + TG 验证)
+7. **score** - 质量评分(6 维度加权打分)

+ 117 - 0
cmd/server/main.go

@@ -0,0 +1,117 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"time"
+
+	"spider/internal/config"
+	"spider/internal/handler"
+	"spider/internal/llm"
+	"spider/internal/model"
+	"spider/internal/search"
+	"spider/internal/service"
+	"spider/internal/telegram"
+	"spider/internal/worker"
+
+	"github.com/redis/go-redis/v9"
+	"gorm.io/driver/mysql"
+	"gorm.io/gorm"
+)
+
+func main() {
+	// 1. 加载配置
+	cfg, err := config.Load("configs/config.yaml")
+	if err != nil {
+		log.Fatalf("load config: %v", err)
+	}
+
+	// 2. 连接 MySQL
+	dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8mb4&parseTime=True&loc=Local",
+		cfg.MySQL.User, cfg.MySQL.Password, cfg.MySQL.Host, cfg.MySQL.Port, cfg.MySQL.Database)
+	db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
+	if err != nil {
+		log.Fatalf("connect mysql: %v", err)
+	}
+
+	// 3. AutoMigrate 所有表
+	err = db.AutoMigrate(
+		&model.ManagedSeed{},
+		&model.ManagedKeyword{},
+		&model.ManagedSetting{},
+		&model.Channel{},
+		&model.NavSite{},
+		&model.MerchantRaw{},
+		&model.MerchantClean{},
+		&model.Task{},
+		&model.ConfigRevision{},
+	)
+	if err != nil {
+		log.Fatalf("automigrate: %v", err)
+	}
+	log.Println("MySQL tables migrated")
+
+	// 3a. 初始化 managed_settings 默认值(幂等,已有记录不覆盖)
+	seedSettings(db)
+
+	// 4. 连接 Redis
+	rdb := redis.NewClient(&redis.Options{
+		Addr:     fmt.Sprintf("%s:%d", cfg.Redis.Host, cfg.Redis.Port),
+		Password: cfg.Redis.Password,
+		DB:       cfg.Redis.DB,
+	})
+	log.Println("Redis connected")
+
+	// 5. 初始化 TaskService
+	taskSvc := service.NewTaskService(db, rdb)
+
+	// 5a. 初始化 SettingsService 并加载到 Redis 缓存
+	settings := service.NewSettingsService(db, rdb)
+	if err := settings.Load(context.Background()); err != nil {
+		log.Printf("load settings into cache: %v", err)
+	}
+
+	// 5b. 初始化 AccountManager(账号从配置读取,为空时运行时从 DB 动态加载)
+	tgAccounts := make([]telegram.Account, 0, len(cfg.Telegram.Accounts))
+	for _, a := range cfg.Telegram.Accounts {
+		tgAccounts = append(tgAccounts, telegram.Account{
+			Phone:       a.Phone,
+			SessionFile: a.SessionFile,
+			AppID:       cfg.Telegram.AppID,
+			AppHash:     cfg.Telegram.AppHash,
+		})
+	}
+	tgManager := telegram.NewAccountManager(tgAccounts, rdb)
+
+	// 5c. 初始化 LLM Client(配置缺失时为 nil,phase 会安全跳过)
+	var llmClient *llm.Client
+	if cfg.LLM.APIKey != "" {
+		llmClient = llm.New(cfg.LLM.BaseURL, cfg.LLM.APIKey, cfg.LLM.Model, 30*time.Second)
+	}
+
+	// 5d. 初始化 Serper Client(配置缺失时为 nil)
+	var serperClient *search.SerperClient
+	if cfg.Serper.APIKey != "" {
+		serperClient = search.NewSerperClient(cfg.Serper.APIKey, cfg.Serper.ResultsPerPage, cfg.Serper.MaxPages)
+	}
+
+	// 6. 初始化并启动 asynq Worker
+	redisAddr := fmt.Sprintf("%s:%d", cfg.Redis.Host, cfg.Redis.Port)
+	w := worker.New(redisAddr, cfg.Redis.Password, cfg.Redis.DB, db, rdb, tgManager, llmClient, settings, serperClient, cfg.GitHub.Token)
+	go func() {
+		log.Println("asynq worker starting...")
+		if err := w.Start(); err != nil {
+			log.Fatalf("asynq worker error: %v", err)
+		}
+	}()
+
+	// 7. 初始化 Gin router
+	r := handler.SetupRouter(db, rdb, taskSvc)
+
+	addr := handler.ServerAddr(cfg.Server.Port)
+	log.Printf("Server starting on %s", addr)
+	if err := r.Run(addr); err != nil {
+		log.Fatalf("gin run: %v", err)
+	}
+}

+ 30 - 0
cmd/server/seed.go

@@ -0,0 +1,30 @@
+package main
+
+import (
+	"spider/internal/model"
+
+	"gorm.io/gorm"
+)
+
+// seedSettings 在 managed_settings 表为空时插入默认配置值。
+// 使用 FirstOrCreate 保证幂等性,已有记录不会被覆盖。
+func seedSettings(db *gorm.DB) {
+	defaults := []model.ManagedSetting{
+		{KeyName: "pipeline.skip_phases", Value: "[]", ValueType: "json", EffectLevel: "new_task", Description: "默认跳过的阶段"},
+		{KeyName: "pipeline.checkpoint_interval", Value: "30", ValueType: "int", EffectLevel: "runtime", Description: "进度上报间隔(秒)"},
+		{KeyName: "tg_scraper.message_limit_per_channel", Value: "500", ValueType: "int", EffectLevel: "runtime", Description: "每频道最大消息数"},
+		{KeyName: "tg_scraper.delay_per_message", Value: "1.0", ValueType: "float", EffectLevel: "runtime", Description: "消息间延迟(秒)"},
+		{KeyName: "tg_scraper.delay_per_channel", Value: "5.0", ValueType: "float", EffectLevel: "runtime", Description: "频道间延迟(秒)"},
+		{KeyName: "tg_scraper.delay_per_verify", Value: "3.0", ValueType: "float", EffectLevel: "runtime", Description: "验证间延迟(秒)"},
+		{KeyName: "clean.timeout_seconds", Value: "3600", ValueType: "int", EffectLevel: "runtime", Description: "清洗阶段超时(秒)"},
+		{KeyName: "search.timeout_seconds", Value: "3600", ValueType: "int", EffectLevel: "runtime", Description: "搜索阶段超时(秒)"},
+		{KeyName: "snowball.max_channels_per_layer", Value: "200", ValueType: "int", EffectLevel: "runtime", Description: "每层最大频道数"},
+		{KeyName: "snowball.max_channels_total", Value: "500", ValueType: "int", EffectLevel: "runtime", Description: "总最大频道数"},
+		{KeyName: "tme_validator.enabled", Value: "true", ValueType: "bool", EffectLevel: "runtime", Description: "启用t.me死号预检"},
+		{KeyName: "tme_validator.rate_per_min", Value: "60", ValueType: "int", EffectLevel: "runtime", Description: "预检限速(次/分)"},
+		{KeyName: "tme_validator.concurrency", Value: "10", ValueType: "int", EffectLevel: "runtime", Description: "预检并发数"},
+	}
+	for _, s := range defaults {
+		db.Where(model.ManagedSetting{KeyName: s.KeyName}).FirstOrCreate(&s)
+	}
+}

+ 35 - 0
configs/config.yaml

@@ -0,0 +1,35 @@
+server:
+  port: 8080
+
+mysql:
+  host: "im_mysql"   # Docker 容器名(chat_im_network)
+  port: 3306
+  user: "root"
+  password: "root123"
+  database: "spider"
+
+redis:
+  host: "im_redis"   # Docker 容器名(chat_im_network)
+  port: 6379
+  password: ""
+  db: 3
+
+telegram:
+  app_id: 0
+  app_hash: ""
+  accounts: []
+
+llm:
+  provider: "openai"
+  base_url: "https://api.openai.com/v1"
+  api_key: ""
+  model: "gpt-4o-mini"
+  timeout: "30s"
+
+serper:
+  api_key: ""
+  results_per_page: 10
+  max_pages: 3
+
+github:
+  token: ""

+ 14 - 0
deploy/Dockerfile.api

@@ -0,0 +1,14 @@
+FROM golang:1.23-alpine AS builder
+WORKDIR /app
+ENV GOTOOLCHAIN=auto
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux go build -o /app/server ./cmd/server
+
+FROM alpine:3.19
+WORKDIR /app
+COPY --from=builder /app/server .
+COPY configs/ configs/
+EXPOSE 8080
+CMD ["./server"]

+ 12 - 0
deploy/Dockerfile.web

@@ -0,0 +1,12 @@
+FROM node:20-alpine AS builder
+WORKDIR /app
+COPY web/package.json ./
+RUN npm install --legacy-peer-deps
+COPY web/ .
+RUN npm run build
+
+FROM nginx:alpine
+COPY --from=builder /app/dist /usr/share/nginx/html
+COPY deploy/nginx.conf /etc/nginx/conf.d/default.conf
+EXPOSE 80
+CMD ["nginx", "-g", "daemon off;"]

+ 37 - 0
deploy/docker-compose.yml

@@ -0,0 +1,37 @@
+version: "3.8"
+
+services:
+  api:
+    build:
+      context: ..
+      dockerfile: deploy/Dockerfile.api
+    ports:
+      - "8200:8080"
+    volumes:
+      - ../configs:/app/configs
+      - ../sessions:/app/sessions
+    environment:
+      - GIN_MODE=release
+    restart: unless-stopped
+    networks:
+      - spider_internal
+      - external_db
+
+  web:
+    build:
+      context: ..
+      dockerfile: deploy/Dockerfile.web
+    ports:
+      - "8300:80"
+    depends_on:
+      - api
+    restart: unless-stopped
+    networks:
+      - spider_internal
+
+networks:
+  spider_internal:
+    driver: bridge
+  external_db:
+    external: true
+    name: chat_im_network   # im_mysql + im_redis 所在的网络

+ 16 - 0
deploy/init.sql

@@ -0,0 +1,16 @@
+-- 初始化 managed_settings 默认值
+-- 运行方式:在 MySQL 客户端中执行此文件,或挂载到 Docker 容器初始化目录
+INSERT IGNORE INTO managed_settings (key_name, value, value_type, effect_level, description) VALUES
+('pipeline.skip_phases',                  '[]',    'json',   'new_task', '默认跳过的阶段'),
+('pipeline.checkpoint_interval',          '30',    'int',    'runtime',  '进度上报间隔(秒)'),
+('tg_scraper.message_limit_per_channel',  '500',   'int',    'runtime',  '每频道最大消息数'),
+('tg_scraper.delay_per_message',          '1.0',   'float',  'runtime',  '消息间延迟(秒)'),
+('tg_scraper.delay_per_channel',          '5.0',   'float',  'runtime',  '频道间延迟(秒)'),
+('tg_scraper.delay_per_verify',           '3.0',   'float',  'runtime',  '验证间延迟(秒)'),
+('clean.timeout_seconds',                 '3600',  'int',    'runtime',  '清洗阶段超时(秒)'),
+('search.timeout_seconds',                '3600',  'int',    'runtime',  '搜索阶段超时(秒)'),
+('snowball.max_channels_per_layer',       '200',   'int',    'runtime',  '每层最大频道数'),
+('snowball.max_channels_total',           '500',   'int',    'runtime',  '总最大频道数'),
+('tme_validator.enabled',                 'true',  'bool',   'runtime',  '启用t.me死号预检'),
+('tme_validator.rate_per_min',            '60',    'int',    'runtime',  '预检限速(次/分)'),
+('tme_validator.concurrency',             '10',    'int',    'runtime',  '预检并发数');

+ 24 - 0
deploy/nginx.conf

@@ -0,0 +1,24 @@
+server {
+    listen 80;
+    server_name _;
+
+    # 前端静态文件
+    location / {
+        root /usr/share/nginx/html;
+        index index.html;
+        try_files $uri $uri/ /index.html;
+    }
+
+    # API 代理(含 WebSocket)
+    location /api/ {
+        proxy_pass http://api:8080;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_read_timeout 300s;
+        proxy_send_timeout 300s;
+    }
+}

+ 820 - 0
docs/design-spec.md

@@ -0,0 +1,820 @@
+# 商户查找系统 - 设计规格书
+
+> 日期: 2026-04-09
+> 目标: 用 Go + React 完全重写现有 Python TG Lead Scraper 系统
+
+---
+
+## 一、系统概述
+
+从 Telegram 频道、搜索引擎、GitHub、导航网站自动挖掘商户联系方式,经过清洗去重验证后按质量打分,输出可用商户清单。
+
+**技术栈:**
+- 后端: Go (Gin + GORM + asynq)
+- 前端: React + Ant Design + zustand
+- 数据库: MySQL (复用现有 Docker 容器, root/root123)
+- 缓存/队列: Redis (复用现有 Docker 容器, 无密码, db3)
+- TG API: gotd/td (MTProto)
+- 网页爬取: colly (静态) + chromedp (JS 渲染)
+- LLM: OpenAI 兼容接口 (可配置切换 OpenAI/Claude/GLM 等)
+- 搜索: Serper API
+- 部署: Docker Compose (仅 Go API + Nginx, 复用已有 MySQL/Redis)
+
+---
+
+## 二、架构
+
+单体服务 + 异步任务队列。API 和 Worker 在同一个 Go 进程内。
+
+```
+┌─────────────────────────────────────┐
+│           Go 单体服务               │
+│  ┌──────────┐  ┌──────────────────┐ │
+│  │ HTTP API │  │  Task Worker     │ │
+│  │ (Gin)    │  │  (7阶段Pipeline) │ │
+│  └──────────┘  └──────────────────┘ │
+│        ↕              ↕             │
+│  ┌──────────┐  ┌──────────────────┐ │
+│  │  MySQL   │  │  Redis           │ │
+│  │  (持久化) │  │  (队列+缓存+锁) │ │
+│  └──────────┘  └──────────────────┘ │
+└─────────────────────────────────────┘
+         ↑
+   React SPA (Nginx)
+```
+
+---
+
+## 三、项目结构
+
+```
+spider/
+├── cmd/
+│   └── server/
+│       └── main.go              # 入口,启动 API + Worker
+├── internal/
+│   ├── config/                  # 配置加载 (YAML + DB managed_settings)
+│   │   └── config.go
+│   ├── model/                   # MySQL 表结构 (GORM)
+│   │   ├── seed.go
+│   │   ├── keyword.go
+│   │   ├── setting.go
+│   │   ├── channel.go
+│   │   ├── nav_site.go
+│   │   ├── merchant_raw.go
+│   │   ├── merchant_clean.go
+│   │   ├── task.go
+│   │   └── config_revision.go
+│   ├── handler/                 # HTTP handler (Gin)
+│   │   ├── task.go
+│   │   ├── merchant.go
+│   │   ├── channel.go
+│   │   ├── nav_site.go
+│   │   ├── seed.go
+│   │   ├── keyword.go
+│   │   ├── config.go
+│   │   └── dashboard.go
+│   ├── service/                 # 业务逻辑层
+│   │   ├── task_service.go
+│   │   ├── merchant_service.go
+│   │   └── config_service.go
+│   ├── pipeline/                # 7阶段 Pipeline 调度器
+│   │   ├── pipeline.go          # 调度器主逻辑
+│   │   ├── phase1_discover.go   # TG 频道裂变
+│   │   ├── phase2_search.go     # Serper 搜索
+│   │   ├── phase3_github.go     # GitHub README 挖掘
+│   │   ├── phase4_scrape.go     # TG 消息采集
+│   │   ├── phase5_crawl.go      # 网页爬取
+│   │   ├── phase6_clean.go      # 清洗三关
+│   │   └── phase7_score.go      # 评分
+│   ├── telegram/                # gotd/td 封装
+│   │   ├── client.go            # TG 客户端封装
+│   │   └── account_manager.go   # 多账号轮换 + FloodWait
+│   ├── search/                  # 搜索引擎封装
+│   │   └── serper.go            # Serper API
+│   ├── crawler/                 # 网页爬取
+│   │   ├── static.go            # colly 静态爬取
+│   │   └── dynamic.go           # chromedp JS 渲染
+│   ├── llm/                     # LLM 统一接口
+│   │   └── client.go            # OpenAI 兼容接口封装
+│   ├── extractor/               # 联系方式提取
+│   │   ├── regex.go             # 正则提取
+│   │   └── llm_extractor.go     # LLM 辅助提取
+│   └── worker/                  # asynq 任务 Worker
+│       └── worker.go
+├── web/                         # React 前端
+│   ├── src/
+│   │   ├── pages/
+│   │   │   ├── Dashboard.tsx        # 总览仪表盘
+│   │   │   ├── Tasks.tsx            # 任务管理
+│   │   │   ├── MerchantsRaw.tsx     # 原始商户表
+│   │   │   ├── MerchantsClean.tsx   # 清洗商户表
+│   │   │   ├── Channels.tsx         # 频道管理
+│   │   │   ├── NavSites.tsx         # 导航网页
+│   │   │   ├── Seeds.tsx            # 种子管理
+│   │   │   ├── Keywords.tsx         # 关键词管理
+│   │   │   └── Settings.tsx         # 系统配置
+│   │   ├── components/
+│   │   │   ├── Layout.tsx
+│   │   │   ├── TaskControl.tsx      # 7阶段独立启动按钮
+│   │   │   └── DataTable.tsx        # 通用表格组件
+│   │   ├── api/
+│   │   │   └── index.ts             # axios 封装
+│   │   └── store/
+│   │       └── index.ts             # zustand 状态管理
+│   ├── package.json
+│   └── vite.config.ts
+├── deploy/
+│   ├── docker-compose.yml
+│   ├── Dockerfile.api
+│   ├── Dockerfile.web
+│   └── nginx.conf
+├── configs/
+│   └── config.yaml              # 默认配置
+└── go.mod
+```
+
+---
+
+## 四、数据模型
+
+### 4.1 MySQL 表 (数据库名: spider)
+
+```sql
+CREATE DATABASE IF NOT EXISTS spider DEFAULT CHARACTER SET utf8mb4;
+USE spider;
+
+-- 种子管理
+CREATE TABLE managed_seeds (
+    id           BIGINT AUTO_INCREMENT PRIMARY KEY,
+    channel_name VARCHAR(255) NOT NULL UNIQUE,
+    status       ENUM('active','inactive') DEFAULT 'active',
+    note         VARCHAR(500),
+    created_at   DATETIME DEFAULT CURRENT_TIMESTAMP,
+    updated_at   DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+);
+
+-- 关键词管理
+CREATE TABLE managed_keywords (
+    id           BIGINT AUTO_INCREMENT PRIMARY KEY,
+    keyword      VARCHAR(255) NOT NULL UNIQUE,
+    category     VARCHAR(100),
+    status       ENUM('active','inactive') DEFAULT 'active',
+    created_at   DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+
+-- 运行参数
+CREATE TABLE managed_settings (
+    id           BIGINT AUTO_INCREMENT PRIMARY KEY,
+    key_name     VARCHAR(255) NOT NULL UNIQUE,
+    value        TEXT NOT NULL,
+    value_type   ENUM('int','float','bool','string','json') NOT NULL,
+    effect_level ENUM('runtime','new_task') DEFAULT 'runtime',
+    description  VARCHAR(500),
+    updated_at   DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+);
+
+-- 发现的 TG 频道
+CREATE TABLE channels (
+    id              BIGINT AUTO_INCREMENT PRIMARY KEY,
+    username        VARCHAR(255) NOT NULL UNIQUE,
+    title           VARCHAR(500),
+    member_count    INT DEFAULT 0,
+    about           TEXT,
+    source          ENUM('seed','snowball','search','github') NOT NULL,
+    source_detail   VARCHAR(500),
+    status          ENUM('pending','scraped','failed','skipped') DEFAULT 'pending',
+    last_message_id INT DEFAULT 0,
+    relevance_score FLOAT,
+    created_at      DATETIME DEFAULT CURRENT_TIMESTAMP,
+    updated_at      DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    INDEX idx_status (status),
+    INDEX idx_source (source)
+);
+
+-- 候选导航网页
+CREATE TABLE nav_sites (
+    id             BIGINT AUTO_INCREMENT PRIMARY KEY,
+    url            VARCHAR(2048) NOT NULL,
+    domain         VARCHAR(255),
+    source         VARCHAR(100),
+    status         ENUM('pending','scraped','filtered','failed') DEFAULT 'pending',
+    filter_reason  VARCHAR(255),
+    merchant_count INT DEFAULT 0,
+    created_at     DATETIME DEFAULT CURRENT_TIMESTAMP,
+    INDEX idx_status (status),
+    UNIQUE INDEX idx_url (url(500))
+);
+
+-- 原始商户
+CREATE TABLE merchants_raw (
+    id               BIGINT AUTO_INCREMENT PRIMARY KEY,
+    merchant_name    VARCHAR(500),
+    tg_username      VARCHAR(255),
+    website          VARCHAR(2048),
+    email            VARCHAR(255),
+    phone            VARCHAR(100),
+    industry         VARCHAR(100),
+    source_type      ENUM('tg_scrape','web_crawl','github') NOT NULL,
+    source_id        VARCHAR(500),
+    original_message TEXT,
+    status           ENUM('raw','glm_parsed') DEFAULT 'raw',
+    created_at       DATETIME DEFAULT CURRENT_TIMESTAMP,
+    INDEX idx_status (status),
+    INDEX idx_tg_username (tg_username)
+);
+
+-- 清洗后商户
+CREATE TABLE merchants_clean (
+    id               BIGINT AUTO_INCREMENT PRIMARY KEY,
+    raw_id           BIGINT,
+    merchant_name    VARCHAR(500),
+    tg_username      VARCHAR(255),
+    website          VARCHAR(2048),
+    email            VARCHAR(255),
+    phone            VARCHAR(100),
+    industry         VARCHAR(100),
+    status           ENUM('valid','invalid','bot','duplicate','group') NOT NULL,
+    tg_first_name    VARCHAR(255),
+    tg_last_name     VARCHAR(255),
+    is_premium       TINYINT(1) DEFAULT 0,
+    last_online      DATETIME,
+    active_level     ENUM('active','moderate','inactive'),
+    member_count     INT DEFAULT 0,
+    quality_score    FLOAT DEFAULT 0,
+    source_count     INT DEFAULT 1,
+    source_links     JSON,
+    created_at       DATETIME DEFAULT CURRENT_TIMESTAMP,
+    updated_at       DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    UNIQUE INDEX idx_tg_username (tg_username),
+    INDEX idx_status (status),
+    INDEX idx_quality (quality_score),
+    INDEX idx_industry (industry)
+);
+
+-- 任务记录
+CREATE TABLE tasks (
+    id           BIGINT AUTO_INCREMENT PRIMARY KEY,
+    task_type    ENUM('full','discover','search','github','scrape','crawl','clean','score') NOT NULL,
+    status       ENUM('pending','running','completed','failed','stopped') DEFAULT 'pending',
+    params       JSON,
+    progress     JSON,
+    result       JSON,
+    error_msg    TEXT,
+    started_at   DATETIME,
+    finished_at  DATETIME,
+    created_at   DATETIME DEFAULT CURRENT_TIMESTAMP,
+    INDEX idx_status (status)
+);
+
+-- 配置变更审计
+CREATE TABLE config_revisions (
+    id           BIGINT AUTO_INCREMENT PRIMARY KEY,
+    setting_key  VARCHAR(255) NOT NULL,
+    old_value    TEXT,
+    new_value    TEXT,
+    changed_by   VARCHAR(100) DEFAULT 'admin',
+    created_at   DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+```
+
+### 4.2 Redis 用途 (db3)
+
+| Key 模式 | 类型 | 用途 | TTL |
+|---|---|---|---|
+| `spider:task:queue:*` | asynq 内部 | 任务队列 | asynq 管理 |
+| `spider:task:progress:{id}` | Hash | 实时进度 | 24h |
+| `spider:task:lock:{type}` | String | 同类型任务互斥锁 | 自动释放 |
+| `spider:tg:floodwait:{account}` | String | 账号冷却截止时间 | 按实际冷却 |
+| `spider:cache:channel:{username}` | Hash | 频道信息缓存 | 24h |
+| `spider:cache:settings` | Hash | managed_settings 热缓存 | 5min |
+| `spider:dedup:merchant:{username}` | String | 采集时快速去重 | 7d |
+
+所有 key 加 `spider:` 前缀避免和其他服务冲突。
+
+---
+
+## 五、API 接口
+
+前缀: `/api/v1`
+
+### 5.1 任务管理
+
+```
+POST   /tasks/start
+  Body: {
+    "task_type": "full|discover|search|github|scrape|crawl|clean|score",
+    "target": "可选, 频道名或关键词",
+    "test_run": { "item_limit": 10, "message_limit": 50 },  // 可选
+    "skip_phases": ["scrape"]  // 可选
+  }
+  Response: { "task_id": 1, "status": "pending" }
+
+POST   /tasks/:id/stop
+  Body: { "force": false }
+
+GET    /tasks
+  Query: ?status=running&page=1&page_size=20
+  Response: { "items": [...], "total": 100 }
+
+GET    /tasks/:id
+  Response: { "id":1, "task_type":"full", "status":"running", "progress": {...} }
+
+GET    /tasks/:id/logs
+  WebSocket 实时推送日志
+```
+
+### 5.2 商户数据
+
+```
+GET    /merchants/raw
+  Query: ?status=raw&source_type=tg_scrape&page=1&page_size=20
+  默认排序: created_at DESC
+
+GET    /merchants/clean
+  Query: ?status=valid&industry=机场&min_score=60&sort=quality_score&order=desc&page=1&page_size=20
+
+GET    /merchants/:id
+  Response: 商户详情 (raw 或 clean 自动判断)
+
+GET    /merchants/stats
+  Response: { "raw_total":1946, "clean_total":125, "valid":62, "by_source":{...}, "by_industry":{...} }
+```
+
+### 5.3 频道管理
+
+```
+GET    /channels
+  Query: ?status=pending&source=snowball&page=1&page_size=20
+
+GET    /channels/stats
+  Response: { "total":1907, "by_status":{...}, "by_source":{...} }
+```
+
+### 5.4 导航网页
+
+```
+GET    /nav-sites
+  Query: ?status=pending&page=1&page_size=20
+```
+
+### 5.5 种子管理
+
+```
+GET    /seeds
+POST   /seeds             Body: { "channel_name": "@bbs3000", "note": "综合频道" }
+PUT    /seeds/:id          Body: { "status": "inactive", "note": "..." }
+DELETE /seeds/:id
+```
+
+### 5.6 关键词管理
+
+```
+GET    /keywords           Query: ?category=机场&status=active
+POST   /keywords           Body: { "keywords": ["机场推荐","发卡网"], "category": "机场" }  // 支持批量
+PUT    /keywords/:id       Body: { "keyword": "...", "category": "..." }
+DELETE /keywords/:id
+```
+
+### 5.7 系统配置
+
+```
+GET    /config/settings
+PUT    /config/settings/:key   Body: { "value": "500" }  // 自动写审计日志
+```
+
+### 5.8 仪表盘
+
+```
+GET    /dashboard
+  Response: {
+    "channels_total": 1907,
+    "merchants_raw_total": 1946,
+    "merchants_clean_total": 125,
+    "merchants_valid": 62,
+    "nav_sites_total": 646,
+    "recent_tasks": [...最近5个任务],
+    "running_task": null | {...}
+  }
+```
+
+---
+
+## 六、Pipeline 7 阶段详细逻辑
+
+### Phase 1: discover (频道裂变)
+
+**输入:** managed_seeds (status=active)
+**输出:** channels 表
+
+1. 从 managed_seeds 拿所有 active 种子
+2. 用 gotd/td 连接 TG,进入每个种子频道
+3. 读最近 100 条消息,提取:
+   - forward_from 频道
+   - 消息内 t.me/xxx 链接
+   - TG 推荐频道
+4. 新频道作为第二层种子继续裂变
+5. 限制: max_depth=3, 每层 max_channels_per_layer(200), 总数 max_channels_total(500)
+6. 每个频道间 sleep 5s
+7. 写入 channels 表, source='seed'(第一层) 或 'snowball'(裂变层)
+
+**FloodWait 处理:** 由 AccountManager 统一管理
+
+### Phase 2: search (搜索引擎)
+
+**输入:** managed_keywords (status=active)
+**输出:** channels + nav_sites
+
+1. 从 managed_keywords 拿所有 active 关键词
+2. 对每个关键词调 Serper API,翻页获取结果
+3. 分拣结果 URL:
+   - `t.me/xxx` → 写 channels 表, source='search'
+   - 域名含 nav/list/catalog/directory → 写 nav_sites 表
+   - 博客/社交媒体/产品官网 → 丢弃
+4. 关键词间 sleep 2s
+
+### Phase 3: github (GitHub 采集)
+
+**输入:** 预设 query 列表 (从 managed_keywords 生成)
+**输出:** channels 表
+
+1. 用 GitHub Search API 搜索 repo (按 star 排序)
+2. 下载每个 repo 的 README.md
+3. 过滤: README 前 5000 字必须含中文
+4. 正则匹配 t.me/xxx 链接
+5. 链接前后 200 字必须含中文
+6. repo 间 sleep 2s, query 间 sleep 5s
+7. 写入 channels 表, source='github'
+
+**注意:** 加 GitHub token 到配置,提升到 30 req/min
+
+### Phase 4: scrape (TG 消息采集)
+
+**输入:** channels 表 (status=pending)
+**输出:** merchants_raw
+
+1. 取 status='pending' 的频道
+2. 对每个频道:
+   a. get_entity 解析频道
+   b. LLM 相关性评估 (频道名+简介+成员数) → 不相关则 skip
+   c. 读频道简介 (about)
+   d. 读置顶消息 (limit=20)
+   e. 遍历历史消息 (limit=500, 断点续传用 last_message_id)
+3. 每条消息:
+   - 系统消息跳过
+   - 非中文跳过
+   - 正则快速判是否含联系方式
+   - 有联系方式 → LLM 精准解析提取商户 (failover 到正则)
+   - 写入 merchants_raw, source_type='tg_scrape'
+4. 消息间 sleep 由 managed_settings 控制
+
+### Phase 5: crawl (网页爬取)
+
+**输入:** nav_sites 表 (status=pending)
+**输出:** merchants_raw
+
+1. 取 status='pending' 的网页
+2. 预过滤 (规则引擎):
+   - 黑名单域名 (80+) → filtered
+   - 黑名单扩展名 (40+) → filtered
+   - 黑名单路径 → filtered
+   - 正向信号 (nav/directory/catalog) → 通过
+   - 不确定 → LLM 二次过滤
+3. 通过的网页:
+   - colly 抓取 → 失败则 chromedp (JS 渲染) → 仍失败标 failed
+   - HTML 前 5000 字非中文跳过
+   - 解析商户链接
+4. 每个商户链接:
+   - 有 @tg_username → t.me 死号预检 (抓网页看有无头像)
+   - 活号 → 写 merchants_raw, source_type='web_crawl'
+   - 只有网站 → 爬商户首页 + /contact, /about 等子页,正则提取联系方式
+
+### Phase 6: clean (清洗三关)
+
+**输入:** merchants_raw (status=raw)
+**输出:** merchants_clean
+
+**第一关: 黑名单过滤 (本地, 毫秒级)**
+- 系统 bot 用户名 (26个 + xxxbot 后缀) → 标 bot
+- 邀请链接哈希 (16-24位 base64 + 高熵) → 标 invalid
+- original_message 非空且不含中文 → 标 invalid
+
+**第二关: 去重 (本地, 毫秒级)**
+- 同 tg_username 多条记录,按信息丰富度打分 (有 website/email/phone 加分)
+- 保留最丰富的一条,其余标 duplicate
+- 合并所有 source 链接到 keeper 的 source_links
+
+**第三关: TG 真实性验证 (需 TG API)**
+- gotd/td get_entity 验证 username
+- 返回 User 且非 bot → valid (顺便拿 first_name/last_name/is_premium/last_online)
+- 返回 Bot → bot
+- 返回 Channel/Chat → group
+- UsernameNotOccupied → invalid
+- FloodWait 处理同上
+
+### Phase 7: score (评分)
+
+**输入:** merchants_clean (status=valid)
+**输出:** 更新 merchants_clean.quality_score
+
+6 维度加权打分 (总权重 1.0):
+
+| 维度 | 权重 | 规则 |
+|---|---|---|
+| member_count | 0.25 | <100→10, <1k→30, <1w→50, <10w→80, ≥10w→100 |
+| premium | 0.15 | is_premium=true→100, false→0 |
+| activity | 0.25 | active→100, moderate→50, inactive→20 |
+| multi_source | 0.20 | source_count≥4→100, 3→70, 2→40, 1→10 |
+| has_website | 0.10 | website非空→100, 空→0 |
+| has_email | 0.05 | email非空→100, 空→0 |
+
+quality_score = 各维度得分 × 权重 之和 (0-100)
+
+---
+
+## 七、TG 账号管理
+
+```go
+// AccountManager 管理多个 TG 账号的连接和限速
+type AccountManager struct {
+    accounts []TGAccount
+    mu       sync.Mutex
+    redis    *redis.Client
+}
+
+type TGAccount struct {
+    Phone     string
+    SessionFile string
+    Client    *telegram.Client   // gotd/td client
+    CoolUntil time.Time
+}
+
+// Acquire 获取一个当前可用的账号
+// 跳过正在冷却的账号,全部冷却则返回错误
+func (m *AccountManager) Acquire(ctx context.Context) (*TGAccount, error)
+
+// Release 归还账号,如果触发了 FloodWait 则标记冷却
+// 冷却截止时间同步到 Redis,重启不丢失
+func (m *AccountManager) Release(acc *TGAccount, floodWait time.Duration)
+
+// FloodWait 策略:
+// ≤60s  → 当前账号等待后重试
+// >60s  → Release + 切换账号
+// >300s → 整轮 break,标记所有账号最小冷却 300s
+```
+
+---
+
+## 八、LLM 统一接口
+
+```go
+// LLMClient 封装 OpenAI 兼容接口
+type LLMClient struct {
+    client   *openai.Client
+    model    string        // 可配置: gpt-4o / claude-3-sonnet / glm-4 等
+    baseURL  string        // 可配置: 指向不同提供商
+    timeout  time.Duration
+}
+
+// 用途 1: 频道相关性评估
+func (c *LLMClient) EvalChannelRelevance(name, about string, memberCount int) (float64, error)
+
+// 用途 2: 消息商户解析
+func (c *LLMClient) ParseMerchant(message string) (*MerchantInfo, error)
+
+// 用途 3: 行业分类
+func (c *LLMClient) ClassifyIndustry(name, about string) (string, error)
+
+// 用途 4: 导航站判断
+func (c *LLMClient) IsNavSite(url string) (bool, float64, error)
+```
+
+配置示例 (config.yaml):
+```yaml
+llm:
+  provider: "openai"        # openai / claude / glm
+  base_url: "https://api.openai.com/v1"
+  api_key: "sk-xxx"
+  model: "gpt-4o-mini"
+  timeout: 30s
+```
+
+---
+
+## 九、联系方式提取器
+
+```go
+// 正则提取 (优先, 零成本)
+func ExtractByRegex(text string) *ContactInfo
+
+// 正则模式:
+// TG 用户名: @[a-zA-Z][a-zA-Z0-9_]{4,31}
+// TG 链接: t\.me/[a-zA-Z0-9_]{5,32}
+// 变体: t点me, t . me, tg: 等
+// 邮箱: 标准 email 正则
+// 电话: +国际区号格式
+// 网址: https?://...
+// 微信变体: 加V, 加v, vx, wx, 微信 后跟联系方式
+
+// LLM 提取 (正则没提取到但文本可能含非标准联系方式时)
+func ExtractByLLM(client *LLMClient, text string) *ContactInfo
+```
+
+---
+
+## 十、网页爬取策略
+
+```
+请求优先级:
+1. colly (静态 HTTP, 最快)
+2. chromedp (无头浏览器, 处理 JS 渲染)
+
+预过滤规则引擎:
+- 黑名单域名: t.me, twitter.com, google.com, facebook.com ... (80+)
+- 黑名单扩展名: .apk, .zip, .pdf, .exe ... (40+)
+- 黑名单路径: /api/, /login/, ?ref= ...
+- 正向信号: URL 含 nav/directory/catalog/list → 直接通过
+- 不确定 → LLM 二次过滤 (置信度 ≥0.6 放行)
+
+t.me 死号预检:
+- 抓 t.me/{username} 网页
+- 没有头像元素 → 判定死号, 直接丢弃
+- 有头像 → 活号, 继续入库
+```
+
+---
+
+## 十一、前端页面
+
+使用 React + Ant Design + zustand + vite
+
+### 页面清单
+
+| 页面 | 路径 | 功能 |
+|---|---|---|
+| 仪表盘 | / | 各表计数、运行中任务、最近任务 |
+| 任务管理 | /tasks | 7阶段独立启动按钮、full pipeline、任务列表、实时进度、停止 |
+| 原始商户 | /merchants/raw | 表格 (分页/过滤/按created_at desc) |
+| 清洗商户 | /merchants/clean | 表格 (分页/过滤/排序by score) |
+| 频道列表 | /channels | 表格 (分页/状态过滤/来源过滤) |
+| 导航网页 | /nav-sites | 表格 (分页/状态过滤) |
+| 种子管理 | /seeds | CRUD 表格 |
+| 关键词管理 | /keywords | CRUD 表格 (支持批量添加) |
+| 系统配置 | /settings | 三个 tab: 种子/关键词/流水线参数 |
+| 系统日志 | /logs | 实时运行日志 (WebSocket) |
+
+### 布局
+
+```
+┌────────────────────────────────────┐
+│  Logo    商户查找系统         Admin │  ← 顶栏
+├──────┬─────────────────────────────┤
+│      │                             │
+│ 仪表盘│        内容区域              │
+│ 任务  │                             │
+│ 商户  │                             │
+│ 频道  │                             │
+│ 网页  │                             │
+│ 配置  │                             │
+│ 日志  │                             │
+│      │                             │
+├──────┴─────────────────────────────┤
+└────────────────────────────────────┘
+         ← 侧边栏导航
+```
+
+---
+
+## 十二、配置文件
+
+```yaml
+# configs/config.yaml
+
+server:
+  port: 8080
+
+mysql:
+  host: "mysql"           # Docker 容器名
+  port: 3306
+  user: "root"
+  password: "root123"
+  database: "spider"
+
+redis:
+  host: "redis"           # Docker 容器名
+  port: 6379
+  password: ""
+  db: 3
+
+telegram:
+  accounts:
+    - phone: "+1234567890"
+      session_file: "sessions/account_01.session"
+    - phone: "+0987654321"
+      session_file: "sessions/account_02.session"
+  app_id: 12345
+  app_hash: "abcdef1234567890"
+
+llm:
+  provider: "openai"
+  base_url: "https://api.openai.com/v1"
+  api_key: "sk-xxx"
+  model: "gpt-4o-mini"
+  timeout: 30s
+
+serper:
+  api_key: "xxx"
+  results_per_page: 10
+  max_pages: 3
+
+github:
+  token: ""               # 可选, 有则 30 req/min
+```
+
+---
+
+## 十三、Docker Compose 部署
+
+```yaml
+# deploy/docker-compose.yml
+version: "3.8"
+
+services:
+  api:
+    build:
+      context: ..
+      dockerfile: deploy/Dockerfile.api
+    ports:
+      - "8080:8080"
+    volumes:
+      - ../configs:/app/configs
+      - ../sessions:/app/sessions
+    depends_on: []
+    restart: unless-stopped
+    networks:
+      - default
+      - external_db    # 连接已有 MySQL/Redis 的网络
+
+  web:
+    build:
+      context: ..
+      dockerfile: deploy/Dockerfile.web
+    ports:
+      - "80:80"
+    depends_on:
+      - api
+    restart: unless-stopped
+
+networks:
+  external_db:
+    external: true       # 已有 MySQL/Redis 所在的 Docker network
+    name: <现有网络名>    # 需要确认
+```
+
+---
+
+## 十四、managed_settings 初始值
+
+| key | value | type | effect_level | description |
+|---|---|---|---|---|
+| pipeline.skip_phases | [] | json | new_task | 默认跳过的阶段 |
+| pipeline.checkpoint_interval | 30 | int | runtime | 进度上报间隔(秒) |
+| tg_scraper.message_limit_per_channel | 500 | int | runtime | 每频道最大消息数 |
+| tg_scraper.delay_per_message | 1.0 | float | runtime | 消息间延迟(秒) |
+| tg_scraper.delay_per_channel | 5.0 | float | runtime | 频道间延迟(秒) |
+| tg_scraper.delay_per_verify | 3.0 | float | runtime | 验证间延迟(秒) |
+| clean.timeout_seconds | 3600 | int | runtime | 清洗阶段超时 |
+| search.timeout_seconds | 3600 | int | runtime | 搜索阶段超时 |
+| snowball.max_channels_per_layer | 200 | int | runtime | 每层最大频道数 |
+| snowball.max_channels_total | 500 | int | runtime | 总最大频道数 |
+| tme_validator.enabled | true | bool | runtime | 启用t.me死号预检 |
+| tme_validator.rate_per_min | 60 | int | runtime | 预检限速 |
+| tme_validator.concurrency | 10 | int | runtime | 预检并发数 |
+
+---
+
+## 十五、相比原 Python 系统的改进点
+
+### 15.1 架构层面
+1. **Go 单二进制部署** — 无 Python venv 依赖地狱
+2. **MySQL 替代 SQLite** — 支持并发写入、不会锁库
+3. **Redis 任务队列** — 替代文件 pipeline_state.json,支持分布式锁
+4. **结构化配置** — YAML + DB managed_settings,热加载
+
+### 15.2 TG 限速改进
+5. **独立节流器** — cleaner verify 有自己的 rate limiter,不和 scraper 共享,避免互相干扰
+6. **冷却状态持久化** — FloodWait 状态存 Redis,服务重启不丢失,不会重复触发限速
+7. **采集时实时去重** — Redis dedup key,同一 username 不会重复写入 raw 表,减少后续清洗压力
+
+### 15.3 采集改进
+8. **GitHub 配 token** — 从 10 req/min 提升到 30 req/min
+9. **chromedp 替代 playwright** — 纯 Go 方案,无需额外安装 Node/浏览器依赖
+10. **LLM 可切换** — 不绑定 GLM,支持 OpenAI/Claude/GLM 随时切换
+
+### 15.4 数据质量
+11. **merchant_name 清洗** — 入库前 strip HTML 标签,修复原系统的污染问题
+12. **采集层去重前置** — Redis 布隆过滤或 set,在写 merchants_raw 之前就去重
+
+### 15.5 运维
+13. **WebSocket 实时日志** — 替代轮询日志文件
+14. **Docker Compose 一键部署** — 不依赖 systemd 手动配置

+ 295 - 0
docs/implementation-plan.md

@@ -0,0 +1,295 @@
+# 商户查找系统 - 实现计划
+
+> 基于 design-spec.md,按依赖顺序拆分为 12 个步骤
+> 每个步骤产出可编译/可运行的代码
+
+---
+
+## Step 1: 项目初始化 + 基础骨架
+
+**目标:** Go module 初始化、目录结构、配置加载、数据库连接
+
+**具体任务:**
+1. `go mod init spider` 并安装核心依赖:
+   - gin, gorm, gorm/driver/mysql, go-redis/v9, asynq
+   - gotd/td, colly, chromedp, go-openai
+2. 创建 `configs/config.yaml` (参考 design-spec 第十二节)
+3. 实现 `internal/config/config.go` — 用 viper 加载 YAML 配置
+4. 实现 `internal/model/` 下所有表结构 (GORM model),参考 design-spec 第四节:
+   - seed.go, keyword.go, setting.go, channel.go, nav_site.go
+   - merchant_raw.go, merchant_clean.go, task.go, config_revision.go
+5. 实现 `cmd/server/main.go` — 启动时连接 MySQL + Redis,AutoMigrate 建表
+6. 创建 `deploy/docker-compose.yml`, `deploy/Dockerfile.api`
+
+**验收:** `go run cmd/server/main.go` 启动成功,MySQL 中建好所有表
+
+---
+
+## Step 2: HTTP API 基础框架 + CRUD 接口
+
+**目标:** Gin 路由注册,实现管理类 CRUD
+
+**具体任务:**
+1. 实现 `internal/handler/seed.go` — 种子 CRUD (GET/POST/PUT/DELETE /api/v1/seeds)
+2. 实现 `internal/handler/keyword.go` — 关键词 CRUD (支持批量添加)
+3. 实现 `internal/handler/config.go` — 配置读取/修改 (写审计日志到 config_revisions)
+4. 实现 `internal/handler/dashboard.go` — 仪表盘统计
+5. 实现 `internal/handler/merchant.go` — 商户列表 (raw + clean, 分页/过滤/排序)
+6. 实现 `internal/handler/channel.go` — 频道列表 + 统计
+7. 实现 `internal/handler/nav_site.go` — 导航网页列表
+8. 在 main.go 注册所有路由,统一错误处理和分页中间件
+
+**验收:** 用 curl 测试所有 CRUD 接口正常
+
+---
+
+## Step 3: 任务系统 (asynq Worker)
+
+**目标:** 实现任务创建、调度、进度上报、停止机制
+
+**具体任务:**
+1. 实现 `internal/worker/worker.go`:
+   - 初始化 asynq server + mux
+   - 注册 7 种 task handler
+2. 实现 `internal/service/task_service.go`:
+   - StartTask: 创建 task 记录 → 推入 Redis 队列
+   - StopTask: 通过 context cancel 停止
+   - GetProgress: 从 Redis 读实时进度
+3. 实现 `internal/handler/task.go`:
+   - POST /tasks/start, POST /tasks/:id/stop
+   - GET /tasks, GET /tasks/:id
+4. 实现 Redis 分布式锁 (同类型任务互斥, full 全局互斥)
+5. 实现进度上报: 各 phase 循环体内定期写 Redis hash
+6. 在 main.go 里同时启动 Gin server 和 asynq worker
+
+**验收:** 能启动一个 mock 任务,看到状态从 pending→running→completed,进度实时更新
+
+---
+
+## Step 4: Pipeline 调度器
+
+**目标:** 实现 pipeline 主逻辑和 7 阶段框架
+
+**具体任务:**
+1. 实现 `internal/pipeline/pipeline.go`:
+   - Run(ctx, taskID, taskType, params) — 根据 taskType 调度对应 phase
+   - full 类型按顺序串行 Phase 1→7,skip_phases 跳过指定阶段
+   - 每个 phase 执行前后更新 task progress
+   - context cancel 检查 (停止机制)
+2. 每个 phase 文件先实现空框架 (interface + stub):
+   ```go
+   type Phase interface {
+       Name() string
+       Run(ctx context.Context, task *model.Task) error
+   }
+   ```
+3. 实现 managed_settings 热加载:
+   - 启动时从 DB 加载到 Redis 缓存
+   - runtime 级别参数从缓存读取
+   - 修改参数时更新缓存
+
+**验收:** 启动 full pipeline 任务,能看到 7 个 phase 依次执行 (stub 直接 pass)
+
+---
+
+## Step 5: LLM 统一接口 + 联系方式提取器
+
+**目标:** 实现 LLM 调用和正则/LLM 联系方式提取
+
+**具体任务:**
+1. 实现 `internal/llm/client.go`:
+   - 封装 go-openai client,支持配置 baseURL/model 切换
+   - EvalChannelRelevance(name, about, memberCount) → score
+   - ParseMerchant(message) → MerchantInfo
+   - ClassifyIndustry(name, about) → industry string
+   - IsNavSite(url) → (bool, confidence)
+   - 统一 timeout 和错误处理
+2. 实现 `internal/extractor/regex.go`:
+   - TG 用户名正则: @[a-zA-Z][a-zA-Z0-9_]{4,31}
+   - TG 链接: t\.me/[a-zA-Z0-9_]{5,32}
+   - 变体: t点me, t . me, tg:
+   - 邮箱、电话、网址标准正则
+   - 微信变体: 加V/vx/wx/微信
+3. 实现 `internal/extractor/llm_extractor.go`:
+   - 正则无结果时 fallback 到 LLM 提取
+4. 中文检测函数: ContainsChinese(text, threshold)
+
+**验收:** 单元测试覆盖各种格式的联系方式提取
+
+---
+
+## Step 6: TG 客户端封装 + 账号管理
+
+**目标:** 封装 gotd/td,实现多账号管理和 FloodWait 处理
+
+**具体任务:**
+1. 实现 `internal/telegram/client.go`:
+   - 封装 gotd/td 连接/认证
+   - GetEntity(username) — 解析频道/用户
+   - GetChannelMessages(channel, limit, offsetID) — 读历史消息
+   - GetChannelInfo(channel) — 读简介+成员数
+   - GetPinnedMessages(channel, limit) — 读置顶消息
+2. 实现 `internal/telegram/account_manager.go`:
+   - Acquire(ctx) → 获取可用账号
+   - Release(acc, floodWait) → 归还+标记冷却
+   - 冷却状态存 Redis `spider:tg:floodwait:{phone}`
+   - FloodWait 策略: ≤60s 等待重试, >60s 切换, >300s 整轮 break
+3. 配置文件读取 TG 账号列表
+
+**验收:** 能连接 TG,获取一个公开频道的信息和消息
+
+---
+
+## Step 7: Phase 1 (discover) + Phase 4 (scrape)
+
+**目标:** 实现 TG 频道裂变和消息采集
+
+**具体任务:**
+1. 实现 `internal/pipeline/phase1_discover.go`:
+   - 从 managed_seeds 拿 active 种子
+   - BFS 裂变: 读消息 → 提取 forward_from + t.me 链接
+   - max_depth=3, 每层 max_channels_per_layer, 总数 max_channels_total
+   - 写入 channels 表 (去重: username UNIQUE)
+   - 频道间 sleep, context cancel 检查
+2. 实现 `internal/pipeline/phase4_scrape.go`:
+   - 取 channels status=pending
+   - LLM 相关性评估 → 不相关标 skipped
+   - 读简介 + 置顶 + 历史消息
+   - 正则快速判 → 有联系方式则 LLM 精准解析
+   - 写入 merchants_raw (Redis dedup 去重)
+   - 断点续传: 更新 channels.last_message_id
+
+**验收:** 从一个种子裂变出频道,并从频道采集到商户
+
+---
+
+## Step 8: Phase 2 (search) + Phase 3 (github)
+
+**目标:** 实现搜索引擎和 GitHub 采集
+
+**具体任务:**
+1. 实现 `internal/search/serper.go`:
+   - 调 Serper API,支持翻页
+   - 返回结构化结果 (url, title, snippet)
+2. 实现 `internal/pipeline/phase2_search.go`:
+   - 从 managed_keywords 拿 active 关键词
+   - 调 Serper 搜索
+   - 分拣: t.me → channels, 导航站 → nav_sites
+   - 关键词间 sleep
+3. 实现 `internal/pipeline/phase3_github.go`:
+   - GitHub Search API 搜 repo (支持 token)
+   - 下载 README,过滤非中文
+   - 正则提取 t.me 链接 (前后 200 字含中文)
+   - 写入 channels 表
+
+**验收:** 用几个关键词搜索到频道和导航站,从 GitHub 挖到 TG 链接
+
+---
+
+## Step 9: Phase 5 (crawl) — 网页爬取
+
+**目标:** 实现导航站爬取和商户提取
+
+**具体任务:**
+1. 实现 `internal/crawler/static.go` — colly 静态爬取
+2. 实现 `internal/crawler/dynamic.go` — chromedp JS 渲染 fallback
+3. 实现预过滤规则引擎:
+   - 黑名单域名/扩展名/路径
+   - 正向信号检测
+   - LLM 二次过滤
+4. 实现 t.me 死号预检 (HTTP 抓 t.me 网页检查头像)
+5. 实现 `internal/pipeline/phase5_crawl.go`:
+   - 取 nav_sites status=pending
+   - 预过滤 → 爬取 → 解析商户链接
+   - 死号预检 → 写入 merchants_raw
+   - 商户官网子页爬取 (/contact, /about)
+
+**验收:** 从导航站提取出商户信息
+
+---
+
+## Step 10: Phase 6 (clean) + Phase 7 (score)
+
+**目标:** 实现清洗三关和评分
+
+**具体任务:**
+1. 实现 `internal/pipeline/phase6_clean.go`:
+   - 第一关: 黑名单过滤 (bot 名单 + 邀请链接 + 非中文)
+   - 第二关: 去重 (同 username 按丰富度保留最优, 合并 source)
+   - 第三关: TG 验证 (get_entity, 有独立 rate limiter)
+   - 结果写入 merchants_clean
+2. 实现 `internal/pipeline/phase7_score.go`:
+   - 6 维度加权打分
+   - 更新 merchants_clean.quality_score
+
+**验收:** raw 商户经过清洗后进入 clean 表,valid 商户有评分
+
+---
+
+## Step 11: React 前端
+
+**目标:** 实现完整管理后台
+
+**具体任务:**
+1. `npm create vite@latest web -- --template react-ts`
+2. 安装 antd, zustand, axios, react-router-dom
+3. 实现 Layout (侧边栏导航 + 顶栏)
+4. 实现页面:
+   - Dashboard — 各表计数卡片 + 运行中任务 + 最近任务列表
+   - Tasks — 7阶段独立启动按钮 + full pipeline 按钮 + 任务列表 + 进度条 + 停止按钮
+   - MerchantsRaw — Ant Design Table (分页/过滤/默认 created_at desc)
+   - MerchantsClean — Table (分页/过滤/按 quality_score 排序)
+   - Channels — Table (分页/状态过滤/来源过滤) + 统计
+   - NavSites — Table (分页/状态过滤)
+   - Seeds — CRUD Table (新增/编辑/删除)
+   - Keywords — CRUD Table (支持批量添加)
+   - Settings — 三个 Tab (种子/关键词/流水线参数)
+   - Logs — WebSocket 实时日志展示
+5. API 层 (`web/src/api/index.ts`) — axios 封装所有后端接口
+6. 状态管理 (`web/src/store/`) — zustand
+
+**验收:** 前端能展示所有数据,能启动/停止任务,能管理种子和关键词
+
+---
+
+## Step 12: Docker 部署 + 联调
+
+**目标:** Docker Compose 完整部署,前后端联调
+
+**具体任务:**
+1. `deploy/Dockerfile.api` — 多阶段构建 Go 二进制
+2. `deploy/Dockerfile.web` — 构建 React + Nginx 静态文件
+3. `deploy/nginx.conf` — 前端静态 + /api 反代到 Go 服务
+4. `deploy/docker-compose.yml` — api + web 两个服务,连接外部 MySQL/Redis 网络
+5. 补充 managed_settings 初始数据 (参考 design-spec 第十四节)
+6. 完整流程联调: 添加种子 → 启动 full pipeline → 查看商户结果
+
+**验收:** `docker-compose up` 一键启动,浏览器访问管理后台,全流程可跑通
+
+---
+
+## 依赖关系
+
+```
+Step 1 (骨架)
+  ↓
+Step 2 (API CRUD)
+  ↓
+Step 3 (任务系统) → Step 4 (Pipeline 调度)
+  ↓                      ↓
+Step 5 (LLM+提取器)   Step 6 (TG 封装)
+  ↓                      ↓
+Step 7 (Phase 1+4, TG 采集)
+  ↓
+Step 8 (Phase 2+3, 搜索+GitHub)
+  ↓
+Step 9 (Phase 5, 网页爬取)
+  ↓
+Step 10 (Phase 6+7, 清洗+评分)
+  ↓
+Step 11 (React 前端)
+  ↓
+Step 12 (Docker 部署+联调)
+```
+
+**注意:** Step 5 和 Step 6 相互独立,可以并行实现。Step 11 (前端) 只依赖 Step 2 的 API,可以在 Step 3 之后就开始并行开发。

+ 120 - 0
go.mod

@@ -0,0 +1,120 @@
+module spider
+
+go 1.26
+
+require (
+	github.com/chromedp/chromedp v0.15.1
+	github.com/gin-gonic/gin v1.12.0
+	github.com/gocolly/colly/v2 v2.3.0
+	github.com/gorilla/websocket v1.5.3
+	github.com/gotd/td v0.143.0
+	github.com/hibiken/asynq v0.26.0
+	github.com/redis/go-redis/v9 v9.14.1
+	github.com/sashabaranov/go-openai v1.41.2
+	github.com/spf13/viper v1.17.0
+	golang.org/x/net v0.52.0
+	gorm.io/datatypes v1.2.7
+	gorm.io/driver/mysql v1.6.0
+	gorm.io/gorm v1.31.1
+)
+
+require (
+	filippo.io/edwards25519 v1.1.0 // indirect
+	github.com/PuerkitoBio/goquery v1.11.0 // indirect
+	github.com/andybalholm/cascadia v1.3.3 // indirect
+	github.com/antchfx/htmlquery v1.3.5 // indirect
+	github.com/antchfx/xmlquery v1.5.0 // indirect
+	github.com/antchfx/xpath v1.3.5 // indirect
+	github.com/bits-and-blooms/bitset v1.24.4 // indirect
+	github.com/bytedance/gopkg v0.1.3 // indirect
+	github.com/bytedance/sonic v1.15.0 // indirect
+	github.com/bytedance/sonic/loader v0.5.0 // indirect
+	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/chromedp/cdproto v0.0.0-20260321001828-e3e3800016bc // indirect
+	github.com/chromedp/sysutil v1.1.0 // indirect
+	github.com/cloudwego/base64x v0.1.6 // indirect
+	github.com/coder/websocket v1.8.14 // indirect
+	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
+	github.com/dlclark/regexp2 v1.11.5 // indirect
+	github.com/fatih/color v1.18.0 // indirect
+	github.com/fsnotify/fsnotify v1.6.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.12 // indirect
+	github.com/ghodss/yaml v1.0.0 // indirect
+	github.com/gin-contrib/sse v1.1.0 // indirect
+	github.com/go-faster/errors v0.7.1 // indirect
+	github.com/go-faster/jx v1.2.0 // indirect
+	github.com/go-faster/xor v1.0.0 // indirect
+	github.com/go-faster/yaml v0.4.6 // indirect
+	github.com/go-json-experiment/json v0.0.0-20260214004413-d219187c3433 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.30.1 // indirect
+	github.com/go-sql-driver/mysql v1.8.1 // indirect
+	github.com/gobwas/glob v0.2.3 // indirect
+	github.com/gobwas/httphead v0.1.0 // indirect
+	github.com/gobwas/pool v0.2.1 // indirect
+	github.com/gobwas/ws v1.4.0 // indirect
+	github.com/goccy/go-json v0.10.5 // indirect
+	github.com/goccy/go-yaml v1.19.2 // indirect
+	github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/gotd/ige v0.2.2 // indirect
+	github.com/gotd/neo v0.1.5 // indirect
+	github.com/hashicorp/hcl v1.0.0 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/kennygrant/sanitize v1.2.4 // indirect
+	github.com/klauspost/compress v1.18.5 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/magiconair/properties v1.8.7 // indirect
+	github.com/mattn/go-colorable v0.1.14 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/mitchellh/mapstructure v1.5.0 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/nlnwa/whatwg-url v0.6.2 // indirect
+	github.com/ogen-go/ogen v1.19.0 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
+	github.com/quic-go/qpack v0.6.0 // indirect
+	github.com/quic-go/quic-go v0.59.0 // indirect
+	github.com/robfig/cron/v3 v3.0.1 // indirect
+	github.com/sagikazarmark/locafero v0.3.0 // indirect
+	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
+	github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
+	github.com/segmentio/asm v1.2.1 // indirect
+	github.com/shopspring/decimal v1.4.0 // indirect
+	github.com/sourcegraph/conc v0.3.0 // indirect
+	github.com/spf13/afero v1.10.0 // indirect
+	github.com/spf13/cast v1.10.0 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/subosito/gotenv v1.6.0 // indirect
+	github.com/temoto/robotstxt v1.1.2 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.3.1 // indirect
+	go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect
+	go.opentelemetry.io/otel v1.42.0 // indirect
+	go.opentelemetry.io/otel/metric v1.42.0 // indirect
+	go.opentelemetry.io/otel/trace v1.42.0 // indirect
+	go.uber.org/atomic v1.11.0 // indirect
+	go.uber.org/multierr v1.11.0 // indirect
+	go.uber.org/zap v1.27.1 // indirect
+	golang.org/x/arch v0.22.0 // indirect
+	golang.org/x/crypto v0.49.0 // indirect
+	golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
+	golang.org/x/mod v0.34.0 // indirect
+	golang.org/x/sync v0.20.0 // indirect
+	golang.org/x/sys v0.42.0 // indirect
+	golang.org/x/text v0.35.0 // indirect
+	golang.org/x/time v0.14.0 // indirect
+	golang.org/x/tools v0.43.0 // indirect
+	google.golang.org/appengine v1.6.8 // indirect
+	google.golang.org/protobuf v1.36.10 // indirect
+	gopkg.in/ini.v1 v1.67.0 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	rsc.io/qr v0.2.0 // indirect
+)

+ 795 - 0
go.sum

@@ -0,0 +1,795 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
+cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
+cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
+cloud.google.com/go v0.44.3/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
+cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
+cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
+cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
+cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
+cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
+cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
+cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
+cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
+cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
+cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
+cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI=
+cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk=
+cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY=
+cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
+cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
+cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
+cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
+cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
+cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
+cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
+cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
+cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
+cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
+cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
+cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
+cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
+cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
+cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
+cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
+cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
+cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
+filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
+github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
+github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
+github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
+github.com/antchfx/htmlquery v1.3.5 h1:aYthDDClnG2a2xePf6tys/UyyM/kRcsFRm+ifhFKoU0=
+github.com/antchfx/htmlquery v1.3.5/go.mod h1:5oyIPIa3ovYGtLqMPNjBF2Uf25NPCKsMjCnQ8lvjaoA=
+github.com/antchfx/xmlquery v1.5.0 h1:uAi+mO40ZWfyU6mlUBxRVvL6uBNZ6LMU4M3+mQIBV4c=
+github.com/antchfx/xmlquery v1.5.0/go.mod h1:lJfWRXzYMK1ss32zm1GQV3gMIW/HFey3xDZmkP1SuNc=
+github.com/antchfx/xpath v1.3.5 h1:PqbXLC3TkfeZyakF5eeh3NTWEbYl4VHNVeufANzDbKQ=
+github.com/antchfx/xpath v1.3.5/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
+github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bits-and-blooms/bitset v1.24.4 h1:95H15Og1clikBrKr/DuzMXkQzECs1M6hhoGXLwLQOZE=
+github.com/bits-and-blooms/bitset v1.24.4/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
+github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
+github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
+github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
+github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
+github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
+github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
+github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
+github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
+github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE=
+github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
+github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
+github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/chromedp/cdproto v0.0.0-20260321001828-e3e3800016bc h1:wkN/LMi5vc60pBRWx6qpbk/aEvq3/ZVNpnMvsw8PVVU=
+github.com/chromedp/cdproto v0.0.0-20260321001828-e3e3800016bc/go.mod h1:cbyjALe67vDvlvdiG9369P8w5U2w6IshwtyD2f2Tvag=
+github.com/chromedp/chromedp v0.15.1 h1:EJWiPm7BNqDqjYy6U0lTSL5wNH+iNt9GjC3a4gfjNyQ=
+github.com/chromedp/chromedp v0.15.1/go.mod h1:CdTHtUqD/dqaFw/cvFWtTydoEQS44wLBuwbMR9EkOY4=
+github.com/chromedp/sysutil v1.1.0 h1:PUFNv5EcprjqXZD9nJb9b/c9ibAbxiYo4exNWZyipwM=
+github.com/chromedp/sysutil v1.1.0/go.mod h1:WiThHUdltqCNKGc4gaU50XgYjwjYIhKWoHGPTUfWTJ8=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
+github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
+github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
+github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g=
+github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
+github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
+github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
+github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
+github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
+github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
+github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
+github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
+github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw=
+github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
+github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
+github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
+github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
+github.com/gin-gonic/gin v1.12.0 h1:b3YAbrZtnf8N//yjKeU2+MQsh2mY5htkZidOM7O0wG8=
+github.com/gin-gonic/gin v1.12.0/go.mod h1:VxccKfsSllpKshkBWgVgRniFFAzFb9csfngsqANjnLc=
+github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg=
+github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo=
+github.com/go-faster/jx v1.2.0 h1:T2YHJPrFaYu21fJtUxC9GzmluKu8rVIFDwwGBKTDseI=
+github.com/go-faster/jx v1.2.0/go.mod h1:UWLOVDmMG597a5tBFPLIWJdUxz5/2emOpfsj9Neg0PE=
+github.com/go-faster/xor v0.3.0/go.mod h1:x5CaDY9UKErKzqfRfFZdfu+OSTfoZny3w5Ak7UxcipQ=
+github.com/go-faster/xor v1.0.0 h1:2o8vTOgErSGHP3/7XwA5ib1FTtUsNtwCoLLBjl31X38=
+github.com/go-faster/xor v1.0.0/go.mod h1:x5CaDY9UKErKzqfRfFZdfu+OSTfoZny3w5Ak7UxcipQ=
+github.com/go-faster/yaml v0.4.6 h1:lOK/EhI04gCpPgPhgt0bChS6bvw7G3WwI8xxVe0sw9I=
+github.com/go-faster/yaml v0.4.6/go.mod h1:390dRIvV4zbnO7qC9FGo6YYutc+wyyUSHBgbXL52eXk=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
+github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
+github.com/go-json-experiment/json v0.0.0-20260214004413-d219187c3433 h1:vymEbVwYFP/L05h5TKQxvkXoKxNvTpjxYKdF1Nlwuao=
+github.com/go-json-experiment/json v0.0.0-20260214004413-d219187c3433/go.mod h1:tphK2c80bpPhMOI4v6bIc2xWywPfbqi1Z06+RcrMkDg=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.30.1 h1:f3zDSN/zOma+w6+1Wswgd9fLkdwy06ntQJp0BBvFG0w=
+github.com/go-playground/validator/v10 v10.30.1/go.mod h1:oSuBIQzuJxL//3MelwSLD5hc2Tu889bF0Idm9Dg26cM=
+github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
+github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
+github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
+github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
+github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
+github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
+github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
+github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
+github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
+github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
+github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
+github.com/gocolly/colly/v2 v2.3.0 h1:HSFh0ckbgVd2CSGRE+Y/iA4goUhGROJwyQDCMXGFBWM=
+github.com/gocolly/colly/v2 v2.3.0/go.mod h1:Qp54s/kQbwCQvFVx8KzKCSTXVJ1wWT4QeAKEu33x1q8=
+github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
+github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
+github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
+github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ=
+github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
+github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
+github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
+github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
+github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
+github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
+github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
+github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
+github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
+github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
+github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
+github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g=
+github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
+github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
+github.com/gotd/ige v0.2.2 h1:XQ9dJZwBfDnOGSTxKXBGP4gMud3Qku2ekScRjDWWfEk=
+github.com/gotd/ige v0.2.2/go.mod h1:tuCRb+Y5Y3eNTo3ypIfNpQ4MFjrnONiL2jN2AKZXmb0=
+github.com/gotd/neo v0.1.5 h1:oj0iQfMbGClP8xI59x7fE/uHoTJD7NZH9oV1WNuPukQ=
+github.com/gotd/neo v0.1.5/go.mod h1:9A2a4bn9zL6FADufBdt7tZt+WMhvZoc5gWXihOPoiBQ=
+github.com/gotd/td v0.143.0 h1:p0U/Nn92zXmAsahDn5CIVzay2kQ36lBBENT/FlWR2nQ=
+github.com/gotd/td v0.143.0/go.mod h1:8GA5ecTI5iswLwBAlqf0u6/+j+BqSWUARSrX2Xk1usQ=
+github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
+github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/hibiken/asynq v0.26.0 h1:1Zxr92MlDnb1Zt/QR5g2vSCqUS03i95lUfqx5X7/wrw=
+github.com/hibiken/asynq v0.26.0/go.mod h1:Qk4e57bTnWDoyJ67VkchuV6VzSM9IQW2nPvAGuDyw58=
+github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA=
+github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
+github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
+github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
+github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
+github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
+github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
+github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE=
+github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
+github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
+github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
+github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
+github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
+github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA=
+github.com/microsoft/go-mssqldb v1.7.2/go.mod h1:kOvZKUdrhhFQmxLZqbwUV0rHkNkZpthMITIb2Ko1IoA=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/nlnwa/whatwg-url v0.6.2 h1:jU61lU2ig4LANydbEJmA2nPrtCGiKdtgT0rmMd2VZ/Q=
+github.com/nlnwa/whatwg-url v0.6.2/go.mod h1:x0FPXJzzOEieQtsBT/AKvbiBbQ46YlL6Xa7m02M1ECk=
+github.com/ogen-go/ogen v1.19.0 h1:YvdNpeQJ8A8dLLpS6Vs4WxXL53BT6tBPxH0VSjfALhA=
+github.com/ogen-go/ogen v1.19.0/go.mod h1:DeShwO+TEpLYXNCuZliSAedphphXsJaTGGbmSomWUjE=
+github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
+github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
+github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
+github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
+github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
+github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw=
+github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
+github.com/redis/go-redis/v9 v9.14.1 h1:nDCrEiJmfOWhD76xlaw+HXT0c9hfNWeXgl0vIRYSDvQ=
+github.com/redis/go-redis/v9 v9.14.1/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw=
+github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
+github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
+github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
+github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
+github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
+github.com/sagikazarmark/locafero v0.3.0 h1:zT7VEGWC2DTflmccN/5T1etyKvxSxpHsjb9cJvm4SvQ=
+github.com/sagikazarmark/locafero v0.3.0/go.mod h1:w+v7UsPNFwzF1cHuOajOOzoq4U7v/ig1mpRjqV+Bu1U=
+github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
+github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
+github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
+github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
+github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
+github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/segmentio/asm v1.2.1 h1:DTNbBqs57ioxAD4PrArqftgypG4/qNpXoJx8TVXxPR0=
+github.com/segmentio/asm v1.2.1/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
+github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
+github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
+github.com/spf13/afero v1.10.0 h1:EaGW2JJh15aKOejeuJ+wpFSHnbd7GE6Wvp3TsNhb6LY=
+github.com/spf13/afero v1.10.0/go.mod h1:UBogFpq8E9Hx+xc5CNTTEpTnuHVmXDwZcZcE1eb/UhQ=
+github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
+github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/viper v1.17.0 h1:I5txKw7MJasPL/BrfkbA0Jyo/oELqVmux4pR/UxOMfI=
+github.com/spf13/viper v1.17.0/go.mod h1:BmMMMLQXSbcHK6KAOiFLz0l5JHrU89OdIRHvsk0+yVI=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
+github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
+github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
+github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.3.1 h1:waO7eEiFDwidsBN6agj1vJQ4AG7lh2yqXyOXqhgQuyY=
+github.com/ugorji/go/codec v1.3.1/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
+github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.mongodb.org/mongo-driver/v2 v2.5.0 h1:yXUhImUjjAInNcpTcAlPHiT7bIXhshCTL3jVBkF3xaE=
+go.mongodb.org/mongo-driver/v2 v2.5.0/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0=
+go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
+go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
+go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
+go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
+go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
+go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
+go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
+go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
+go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
+go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
+go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
+go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
+go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
+go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
+go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
+go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y=
+go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU=
+go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
+go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
+go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc=
+go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
+golang.org/x/arch v0.22.0 h1:c/Zle32i5ttqRXjdLyyHZESLD/bB90DCU1g9l/0YBDI=
+golang.org/x/arch v0.22.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc=
+golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
+golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
+golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
+golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
+golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
+golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
+golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
+golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
+golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
+golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI=
+golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
+golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
+golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
+golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
+golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
+golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
+golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
+golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
+golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek=
+golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
+golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
+golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
+golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
+golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
+golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
+golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
+golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
+golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
+golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
+golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s=
+golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
+google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
+google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
+google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
+google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
+google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
+google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
+google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
+google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
+google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
+google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
+google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE=
+google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
+google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
+google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
+google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
+google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
+google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
+google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
+google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
+google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
+google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
+google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
+google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
+google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8=
+google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
+google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
+gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
+gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/datatypes v1.2.7 h1:ww9GAhF1aGXZY3EB3cJPJ7//JiuQo7DlQA7NNlVaTdk=
+gorm.io/datatypes v1.2.7/go.mod h1:M2iO+6S3hhi4nAyYe444Pcb0dcIiOMJ7QHaUXxyiNZY=
+gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg=
+gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo=
+gorm.io/driver/postgres v1.5.0 h1:u2FXTy14l45qc3UeCJ7QaAXZmZfDDv0YrthvmRq1l0U=
+gorm.io/driver/postgres v1.5.0/go.mod h1:FUZXzO+5Uqg5zzwzv4KK49R8lvGIyscBOqYrtI1Ce9A=
+gorm.io/driver/sqlite v1.6.0 h1:WHRRrIiulaPiPFmDcod6prc4l2VGVWHz80KspNsxSfQ=
+gorm.io/driver/sqlite v1.6.0/go.mod h1:AO9V1qIQddBESngQUKWL9yoH93HIeA1X6V633rBwyT8=
+gorm.io/driver/sqlserver v1.6.0 h1:VZOBQVsVhkHU/NzNhRJKoANt5pZGQAS1Bwc6m6dgfnc=
+gorm.io/driver/sqlserver v1.6.0/go.mod h1:WQzt4IJo/WHKnckU9jXBLMJIVNMVeTu25dnOzehntWw=
+gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg=
+gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
+honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
+honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
+nhooyr.io/websocket v1.8.17 h1:KEVeLJkUywCKVsnLIDlD/5gtayKp8VoCkksHCGGfT9Y=
+nhooyr.io/websocket v1.8.17/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c=
+rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
+rsc.io/qr v0.2.0 h1:6vBLea5/NRMVTz8V66gipeLycZMl/+UlFmk8DvqQ6WY=
+rsc.io/qr v0.2.0/go.mod h1:IF+uZjkb9fqyeF/4tlBoynqmQxUoPfWEKh921coOuXs=
+rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
+rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

+ 89 - 0
internal/config/config.go

@@ -0,0 +1,89 @@
+package config
+
+import (
+	"fmt"
+
+	"github.com/spf13/viper"
+)
+
+type Config struct {
+	Server   ServerConfig
+	MySQL    MySQLConfig
+	Redis    RedisConfig
+	Telegram TelegramConfig
+	LLM      LLMConfig
+	Serper   SerperConfig
+	GitHub   GitHubConfig
+}
+
+type ServerConfig struct {
+	Port int
+}
+
+type MySQLConfig struct {
+	Host     string
+	Port     int
+	User     string
+	Password string
+	Database string
+}
+
+type RedisConfig struct {
+	Host     string
+	Port     int
+	Password string
+	DB       int
+}
+
+type TelegramConfig struct {
+	AppID    int        `mapstructure:"app_id"`
+	AppHash  string     `mapstructure:"app_hash"`
+	Accounts []TGAccount
+}
+
+type TGAccount struct {
+	Phone       string
+	SessionFile string `mapstructure:"session_file"`
+}
+
+type LLMConfig struct {
+	Provider string
+	BaseURL  string `mapstructure:"base_url"`
+	APIKey   string `mapstructure:"api_key"`
+	Model    string
+	Timeout  string
+}
+
+type SerperConfig struct {
+	APIKey         string `mapstructure:"api_key"`
+	ResultsPerPage int    `mapstructure:"results_per_page"`
+	MaxPages       int    `mapstructure:"max_pages"`
+}
+
+type GitHubConfig struct {
+	Token string
+}
+
+var global *Config
+
+func Load(path string) (*Config, error) {
+	v := viper.New()
+	v.SetConfigFile(path)
+	v.SetConfigType("yaml")
+
+	if err := v.ReadInConfig(); err != nil {
+		return nil, fmt.Errorf("read config file: %w", err)
+	}
+
+	cfg := &Config{}
+	if err := v.Unmarshal(cfg); err != nil {
+		return nil, fmt.Errorf("unmarshal config: %w", err)
+	}
+
+	global = cfg
+	return cfg, nil
+}
+
+func Get() *Config {
+	return global
+}

+ 0 - 0
internal/crawler/.gitkeep


+ 66 - 0
internal/crawler/dynamic.go

@@ -0,0 +1,66 @@
+package crawler
+
+import (
+	"context"
+	"strings"
+	"time"
+
+	"github.com/chromedp/chromedp"
+)
+
+// DynamicCrawler chromedp 动态爬取
+type DynamicCrawler struct{}
+
+// NewDynamicCrawler 创建 DynamicCrawler
+func NewDynamicCrawler() *DynamicCrawler { return &DynamicCrawler{} }
+
+// Crawl 用无头浏览器爬取(用于 JS 渲染的页面)
+func (c *DynamicCrawler) Crawl(ctx context.Context, targetURL string) *CrawlResult {
+	result := &CrawlResult{}
+
+	// 创建 chromedp allocator context,最多等待 30s
+	allocCtx, cancel := chromedp.NewExecAllocator(ctx,
+		chromedp.Headless,
+		chromedp.DisableGPU,
+		chromedp.NoSandbox,
+		chromedp.Flag("disable-web-security", true),
+	)
+	defer cancel()
+
+	taskCtx, taskCancel := context.WithTimeout(allocCtx, 30*time.Second)
+	defer taskCancel()
+
+	chromeCtx, chromeCancel := chromedp.NewContext(taskCtx)
+	defer chromeCancel()
+
+	var html string
+	var links []interface{}
+
+	err := chromedp.Run(chromeCtx,
+		chromedp.Navigate(targetURL),
+		chromedp.Sleep(3*time.Second), // 等待 JS 渲染
+		chromedp.OuterHTML("html", &html),
+		chromedp.Evaluate(`Array.from(document.querySelectorAll('a[href]')).map(a => a.href)`, &links),
+	)
+
+	if err != nil {
+		result.Error = err
+		return result
+	}
+
+	result.HTML = html
+
+	// 将 interface{} 切片转为字符串切片
+	for _, item := range links {
+		link, ok := item.(string)
+		if !ok {
+			continue
+		}
+		result.Links = append(result.Links, link)
+		if strings.Contains(link, "t.me/") || strings.Contains(link, "telegram.me/") {
+			result.TgLinks = append(result.TgLinks, link)
+		}
+	}
+
+	return result
+}

+ 107 - 0
internal/crawler/filter.go

@@ -0,0 +1,107 @@
+package crawler
+
+import (
+	"net/url"
+	"regexp"
+	"strings"
+)
+
+// FilterResult URL 过滤结果
+type FilterResult int
+
+const (
+	FilterDiscard   FilterResult = iota // 直接丢弃
+	FilterValid                         // 确定是导航站
+	FilterUncertain                     // 不确定,需 LLM 判断
+)
+
+// 黑名单域名
+var blacklistDomains = []string{
+	"t.me", "telegram.me", "twitter.com", "x.com", "facebook.com",
+	"instagram.com", "youtube.com", "google.com", "baidu.com",
+	"weibo.com", "zhihu.com", "github.com", "stackoverflow.com",
+	"wikipedia.org", "amazon.com", "taobao.com", "jd.com", "tmall.com",
+	"qq.com", "163.com", "126.com", "sina.com.cn", "sohu.com",
+	"tencent.com", "alipay.com", "wechat.com", "apple.com",
+	"microsoft.com", "windows.com", "android.com",
+}
+
+// 黑名单扩展名
+var blacklistExtensions = []string{
+	".apk", ".zip", ".pdf", ".exe", ".dmg", ".ipa", ".rar", ".7z",
+	".mp4", ".mp3", ".avi", ".jpg", ".png", ".gif", ".svg",
+	".css", ".js", ".json", ".xml",
+}
+
+// 黑名单路径片段
+var blacklistPaths = []string{
+	"/api/", "/login/", "/logout/", "/register/", "/signup/",
+	"/wp-admin/", "/admin/", "?ref=", "?utm_", "/cdn-cgi/",
+}
+
+// 正向信号
+var navSignals = []string{
+	"nav", "directory", "catalog", "daohang", "dh", "list",
+	"导航", "目录", "聚合", "推荐", "收录",
+}
+
+// RuleFilter 规则引擎过滤
+func RuleFilter(rawURL string) FilterResult {
+	u, err := url.Parse(rawURL)
+	if err != nil {
+		return FilterDiscard
+	}
+
+	host := strings.ToLower(u.Hostname())
+	path := strings.ToLower(u.Path)
+	fullURL := strings.ToLower(rawURL)
+
+	// 黑名单域名
+	for _, d := range blacklistDomains {
+		if strings.Contains(host, d) {
+			return FilterDiscard
+		}
+	}
+
+	// 黑名单扩展名
+	for _, ext := range blacklistExtensions {
+		if strings.HasSuffix(path, ext) {
+			return FilterDiscard
+		}
+	}
+
+	// 黑名单路径
+	for _, p := range blacklistPaths {
+		if strings.Contains(fullURL, p) {
+			return FilterDiscard
+		}
+	}
+
+	// 正向信号
+	for _, sig := range navSignals {
+		if strings.Contains(fullURL, sig) {
+			return FilterValid
+		}
+	}
+
+	return FilterUncertain
+}
+
+// ExtractDomain 提取域名
+func ExtractDomain(rawURL string) string {
+	u, err := url.Parse(rawURL)
+	if err != nil {
+		return ""
+	}
+	return u.Hostname()
+}
+
+// ExtractTGUsername 从 URL 提取 TG 用户名
+func ExtractTGUsername(rawURL string) string {
+	re := regexp.MustCompile(`t(?:elegram)?\.me/([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	m := re.FindStringSubmatch(rawURL)
+	if len(m) > 1 {
+		return m[1]
+	}
+	return ""
+}

+ 78 - 0
internal/crawler/static.go

@@ -0,0 +1,78 @@
+package crawler
+
+import (
+	"context"
+	"strings"
+	"time"
+
+	"github.com/gocolly/colly/v2"
+)
+
+// StaticCrawler 静态网页爬取(colly)
+type StaticCrawler struct{}
+
+// NewStaticCrawler 创建 StaticCrawler
+func NewStaticCrawler() *StaticCrawler { return &StaticCrawler{} }
+
+// CrawlResult 爬取结果
+type CrawlResult struct {
+	Links   []string // 发现的链接
+	TgLinks []string // t.me 链接
+	Emails  []string
+	HTML    string
+	Error   error
+}
+
+// Crawl 爬取网页,提取所有链接
+func (c *StaticCrawler) Crawl(ctx context.Context, targetURL string) *CrawlResult {
+	result := &CrawlResult{}
+
+	collector := colly.NewCollector(
+		colly.MaxDepth(1),
+		colly.Async(false),
+	)
+	collector.SetRequestTimeout(15 * time.Second)
+
+	// 提取所有 <a href> 链接
+	collector.OnHTML("a[href]", func(e *colly.HTMLElement) {
+		href := e.Attr("href")
+		if href == "" {
+			return
+		}
+
+		// 绝对 URL
+		absURL := e.Request.AbsoluteURL(href)
+		if absURL == "" {
+			return
+		}
+
+		result.Links = append(result.Links, absURL)
+
+		if strings.Contains(absURL, "t.me/") || strings.Contains(absURL, "telegram.me/") {
+			result.TgLinks = append(result.TgLinks, absURL)
+		}
+	})
+
+	collector.OnResponse(func(r *colly.Response) {
+		result.HTML = string(r.Body)
+	})
+
+	collector.OnError(func(r *colly.Response, err error) {
+		result.Error = err
+	})
+
+	// 使用 channel 传递 context 取消
+	done := make(chan struct{})
+	go func() {
+		collector.Visit(targetURL) //nolint:errcheck
+		close(done)
+	}()
+
+	select {
+	case <-ctx.Done():
+		result.Error = ctx.Err()
+	case <-done:
+	}
+
+	return result
+}

+ 62 - 0
internal/crawler/tme_validator.go

@@ -0,0 +1,62 @@
+package crawler
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+)
+
+// TMeValidator t.me 死号预检
+type TMeValidator struct {
+	http *http.Client
+}
+
+// NewTMeValidator 创建 TMeValidator
+func NewTMeValidator() *TMeValidator {
+	return &TMeValidator{
+		http: &http.Client{
+			Timeout: 10 * time.Second,
+			CheckRedirect: func(req *http.Request, via []*http.Request) error {
+				return nil // 跟随重定向
+			},
+		},
+	}
+}
+
+// IsAlive 检查 TG 用户名是否活跃
+// 通过抓取 t.me/{username} 网页,检查是否包含头像元素来判断
+func (v *TMeValidator) IsAlive(ctx context.Context, username string) bool {
+	url := fmt.Sprintf("https://t.me/%s", username)
+	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+	if err != nil {
+		return false
+	}
+
+	req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1)")
+
+	resp, err := v.http.Do(req)
+	if err != nil {
+		return false
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == 404 {
+		return false
+	}
+
+	// 读取部分响应体(前 10KB 足够判断)
+	buf := make([]byte, 10240)
+	n, _ := resp.Body.Read(buf)
+	body := string(buf[:n])
+
+	// 有效账号的页面包含头像相关的 HTML 元素
+	if strings.Contains(body, "tgme_page_photo") || strings.Contains(body, "og:image") {
+		return true
+	}
+	if strings.Contains(body, "tgme_page_description") {
+		return true
+	}
+	return false
+}

+ 0 - 0
internal/extractor/.gitkeep


+ 21 - 0
internal/extractor/llm_extractor.go

@@ -0,0 +1,21 @@
+package extractor
+
+import "context"
+
+// LLMClientInterface 接口(避免循环依赖)
+type LLMClientInterface interface {
+	ParseMerchant(ctx context.Context, message string) (*MerchantInfo, error)
+}
+
+// LLMExtract 当正则提取无结果时,调用 LLM 提取
+// 如果 LLM 调用失败,返回 nil 不报错
+func LLMExtract(ctx context.Context, llmClient LLMClientInterface, text string) *MerchantInfo {
+	if llmClient == nil || text == "" {
+		return nil
+	}
+	info, err := llmClient.ParseMerchant(ctx, text)
+	if err != nil {
+		return nil
+	}
+	return info
+}

+ 137 - 0
internal/extractor/regex.go

@@ -0,0 +1,137 @@
+package extractor
+
+import (
+	"regexp"
+	"strings"
+	"unicode"
+)
+
+// TG 用户名正则模式:
+// 标准: @username 或 t.me/username
+// 变体: t点me/xxx, t . me/xxx, t.me/xxx, tg:xxx, telegram.me/xxx
+
+var (
+	reTgAt    = regexp.MustCompile(`@([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	reTgLink  = regexp.MustCompile(`(?:https?://)?t(?:elegram)?\.me/([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	reTgDot   = regexp.MustCompile(`t[点.·\s]*me[/\s]*([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	reTgColon = regexp.MustCompile(`(?i)tg[::\s]+([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	reEmail   = regexp.MustCompile(`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}`)
+	rePhone   = regexp.MustCompile(`\+?[0-9]{7,15}`)
+	reWebsite = regexp.MustCompile(`https?://[^\s<>"'` + "\u4e00-\u9fa5" + `]+`)
+	reWeChat  = regexp.MustCompile(`(?i)(?:微信|vx|wx|加v|加V|weixin)[::\s]*([a-zA-Z0-9_\-]{5,20})`)
+
+	reHTMLTag = regexp.MustCompile(`<[^>]+>`)
+)
+
+// Extract 从文本提取联系方式(正则,快速)
+// 优先级:先提取 t.me 链接(更精确),再提取 @用户名,避免重复
+func Extract(text string) *ContactInfo {
+	info := &ContactInfo{}
+
+	// 1. 优先提取标准 t.me 链接
+	if m := reTgLink.FindStringSubmatch(text); m != nil {
+		info.TgUsername = m[1]
+		info.TgLink = "t.me/" + m[1]
+	}
+
+	// 2. 若无 t.me 链接,尝试中文变体 (t点me / t.me)
+	if info.TgUsername == "" {
+		if m := reTgDot.FindStringSubmatch(text); m != nil {
+			info.TgUsername = m[1]
+			info.TgLink = "t.me/" + m[1]
+		}
+	}
+
+	// 3. 若无 t.me 变体,尝试 tg: 前缀
+	if info.TgUsername == "" {
+		if m := reTgColon.FindStringSubmatch(text); m != nil {
+			info.TgUsername = m[1]
+			info.TgLink = "t.me/" + m[1]
+		}
+	}
+
+	// 4. 最后尝试 @用户名(避免与已提取用户名重复)
+	if info.TgUsername == "" {
+		if m := reTgAt.FindStringSubmatch(text); m != nil {
+			info.TgUsername = m[1]
+			info.TgLink = "t.me/" + m[1]
+		}
+	}
+
+	// 5. 提取 Email(过滤掉 TG 用户名中的 @ 误匹配)
+	if m := reEmail.FindString(text); m != "" {
+		info.Email = m
+	}
+
+	// 6. 提取网站
+	if m := reWebsite.FindString(text); m != "" {
+		// 过滤掉 t.me 本身
+		if !strings.Contains(strings.ToLower(m), "t.me/") && !strings.Contains(strings.ToLower(m), "telegram.me/") {
+			info.Website = strings.TrimRight(m, ".,;)")
+		}
+	}
+
+	// 7. 提取电话(过滤纯数字短于7位)
+	if m := rePhone.FindString(text); m != "" {
+		cleaned := strings.TrimPrefix(m, "+")
+		if len(cleaned) >= 7 {
+			info.Phone = m
+		}
+	}
+
+	// 8. 提取微信
+	if m := reWeChat.FindStringSubmatch(text); m != nil {
+		info.WeChat = m[1]
+	}
+
+	info.HasContact = info.TgUsername != "" || info.Email != "" ||
+		info.Website != "" || info.Phone != "" || info.WeChat != ""
+
+	return info
+}
+
+// HasContact 快速判断文本是否含任何联系方式(无需完整提取)
+func HasContact(text string) bool {
+	return reTgAt.MatchString(text) ||
+		reTgLink.MatchString(text) ||
+		reTgDot.MatchString(text) ||
+		reTgColon.MatchString(text) ||
+		reEmail.MatchString(text) ||
+		reWebsite.MatchString(text) ||
+		reWeChat.MatchString(text)
+}
+
+// ContainsChinese 检查文本是否包含中文
+// threshold: 中文字符占总字符的最低比例 (0-1),0表示只要有中文就返回true
+func ContainsChinese(text string, threshold float64) bool {
+	if threshold <= 0 {
+		for _, r := range text {
+			if unicode.Is(unicode.Han, r) {
+				return true
+			}
+		}
+		return false
+	}
+	return ChineseRatio(text) >= threshold
+}
+
+// ChineseRatio 返回中文字符比例
+func ChineseRatio(text string) float64 {
+	runes := []rune(text)
+	if len(runes) == 0 {
+		return 0
+	}
+	var count int
+	for _, r := range runes {
+		if unicode.Is(unicode.Han, r) {
+			count++
+		}
+	}
+	return float64(count) / float64(len(runes))
+}
+
+// CleanMerchantName 清洗商户名(去除HTML标签、多余空白)
+func CleanMerchantName(name string) string {
+	name = reHTMLTag.ReplaceAllString(name, "")
+	return strings.TrimSpace(name)
+}

+ 23 - 0
internal/extractor/types.go

@@ -0,0 +1,23 @@
+package extractor
+
+// ContactInfo 提取出的联系方式
+type ContactInfo struct {
+	TgUsername string `json:"tg_username"` // TG用户名 (不含@)
+	TgLink     string `json:"tg_link"`     // t.me/xxx 链接
+	Website    string `json:"website"`
+	Email      string `json:"email"`
+	Phone      string `json:"phone"`
+	WeChat     string `json:"wechat"`
+	HasContact bool   `json:"has_contact"` // 是否包含任何联系方式
+}
+
+// MerchantInfo LLM 解析出的商户信息
+type MerchantInfo struct {
+	MerchantName string `json:"merchant_name"`
+	TgUsername   string `json:"tg_username"`
+	Website      string `json:"website"`
+	Email        string `json:"email"`
+	Phone        string `json:"phone"`
+	Industry     string `json:"industry"`
+	Description  string `json:"description"`
+}

+ 0 - 0
internal/handler/.gitkeep


+ 82 - 0
internal/handler/channel.go

@@ -0,0 +1,82 @@
+package handler
+
+import (
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// ChannelHandler handles channel queries.
+type ChannelHandler struct {
+	db *gorm.DB
+}
+
+// List returns channels with optional filters and pagination.
+// GET /channels?status=&source=&page=&page_size=
+func (h *ChannelHandler) List(c *gin.Context) {
+	page, pageSize, offset := parsePage(c)
+
+	query := h.db.Model(&model.Channel{})
+	if status := c.Query("status"); status != "" {
+		query = query.Where("status = ?", status)
+	}
+	if source := c.Query("source"); source != "" {
+		query = query.Where("source = ?", source)
+	}
+
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	var items []model.Channel
+	if err := query.Order("id DESC").Limit(pageSize).Offset(offset).Find(&items).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, items, total, page, pageSize)
+}
+
+// Stats returns channel counts grouped by status and source.
+// GET /channels/stats
+func (h *ChannelHandler) Stats(c *gin.Context) {
+	var statusRows []struct {
+		Status string `json:"status"`
+		Cnt    int64  `json:"count"`
+	}
+	h.db.Model(&model.Channel{}).
+		Select("status, count(*) as cnt").
+		Group("status").
+		Scan(&statusRows)
+
+	byStatus := map[string]int64{}
+	for _, r := range statusRows {
+		byStatus[r.Status] = r.Cnt
+	}
+
+	var sourceRows []struct {
+		Source string `json:"source"`
+		Cnt    int64  `json:"count"`
+	}
+	h.db.Model(&model.Channel{}).
+		Select("source, count(*) as cnt").
+		Group("source").
+		Scan(&sourceRows)
+
+	bySource := map[string]int64{}
+	for _, r := range sourceRows {
+		bySource[r.Source] = r.Cnt
+	}
+
+	var total int64
+	h.db.Model(&model.Channel{}).Count(&total)
+
+	OK(c, gin.H{
+		"total":     total,
+		"by_status": byStatus,
+		"by_source": bySource,
+	})
+}

+ 77 - 0
internal/handler/config.go

@@ -0,0 +1,77 @@
+package handler
+
+import (
+	"net/http"
+
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// ConfigHandler handles managed settings.
+type ConfigHandler struct {
+	db *gorm.DB
+}
+
+// ListSettings returns all settings.
+// GET /config/settings
+func (h *ConfigHandler) ListSettings(c *gin.Context) {
+	var settings []model.ManagedSetting
+	if err := h.db.Order("key_name ASC").Find(&settings).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+	OK(c, settings)
+}
+
+// UpdateSetting updates a setting by key and records an audit revision.
+// PUT /config/settings/:key
+func (h *ConfigHandler) UpdateSetting(c *gin.Context) {
+	key := c.Param("key")
+	if key == "" {
+		Fail(c, http.StatusBadRequest, "key is required")
+		return
+	}
+
+	var body struct {
+		Value       string `json:"value" binding:"required"`
+		Description string `json:"description"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		Fail(c, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	var setting model.ManagedSetting
+	if err := h.db.Where("key_name = ?", key).First(&setting).Error; err != nil {
+		Fail(c, 404, "setting not found")
+		return
+	}
+
+	oldValue := setting.Value
+
+	updates := map[string]interface{}{
+		"value": body.Value,
+	}
+	if body.Description != "" {
+		updates["description"] = body.Description
+	}
+
+	if err := h.db.Model(&setting).Updates(updates).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	// Record audit revision.
+	revision := model.ConfigRevision{
+		SettingKey: key,
+		OldValue:   oldValue,
+		NewValue:   body.Value,
+		ChangedBy:  "admin",
+	}
+	h.db.Create(&revision)
+
+	h.db.Where("key_name = ?", key).First(&setting)
+	OK(c, setting)
+}

+ 61 - 0
internal/handler/dashboard.go

@@ -0,0 +1,61 @@
+package handler
+
+import (
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// DashboardHandler handles the dashboard summary endpoint.
+type DashboardHandler struct {
+	db *gorm.DB
+}
+
+// Get returns aggregated dashboard statistics.
+// GET /dashboard
+func (h *DashboardHandler) Get(c *gin.Context) {
+	var channelsTotal int64
+	h.db.Model(&model.Channel{}).Count(&channelsTotal)
+
+	var merchantsRawTotal int64
+	h.db.Model(&model.MerchantRaw{}).Count(&merchantsRawTotal)
+
+	var merchantsCleanTotal int64
+	h.db.Model(&model.MerchantClean{}).Count(&merchantsCleanTotal)
+
+	var merchantsValid int64
+	h.db.Model(&model.MerchantClean{}).Where("status = ?", "valid").Count(&merchantsValid)
+
+	var navSitesTotal int64
+	h.db.Model(&model.NavSite{}).Count(&navSitesTotal)
+
+	var seedsTotal int64
+	h.db.Model(&model.ManagedSeed{}).Count(&seedsTotal)
+
+	var keywordsTotal int64
+	h.db.Model(&model.ManagedKeyword{}).Count(&keywordsTotal)
+
+	// Recent 5 tasks.
+	var recentTasks []model.Task
+	h.db.Order("created_at DESC").Limit(5).Find(&recentTasks)
+
+	// Currently running task (first one).
+	var runningTask *model.Task
+	var rt model.Task
+	if err := h.db.Where("status = ?", "running").First(&rt).Error; err == nil {
+		runningTask = &rt
+	}
+
+	OK(c, gin.H{
+		"channels_total":         channelsTotal,
+		"merchants_raw_total":    merchantsRawTotal,
+		"merchants_clean_total":  merchantsCleanTotal,
+		"merchants_valid":        merchantsValid,
+		"nav_sites_total":        navSitesTotal,
+		"seeds_total":            seedsTotal,
+		"keywords_total":         keywordsTotal,
+		"recent_tasks":           recentTasks,
+		"running_task":           runningTask,
+	})
+}

+ 138 - 0
internal/handler/keyword.go

@@ -0,0 +1,138 @@
+package handler
+
+import (
+	"net/http"
+	"strconv"
+
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// KeywordHandler handles managed keyword CRUD.
+type KeywordHandler struct {
+	db *gorm.DB
+}
+
+// List returns keywords with optional filters and pagination.
+// GET /keywords?page=1&page_size=20&category=&status=
+func (h *KeywordHandler) List(c *gin.Context) {
+	page, pageSize, offset := parsePage(c)
+
+	query := h.db.Model(&model.ManagedKeyword{})
+	if category := c.Query("category"); category != "" {
+		query = query.Where("category = ?", category)
+	}
+	if status := c.Query("status"); status != "" {
+		query = query.Where("status = ?", status)
+	}
+
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	var keywords []model.ManagedKeyword
+	if err := query.Order("id DESC").Limit(pageSize).Offset(offset).Find(&keywords).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, keywords, total, page, pageSize)
+}
+
+// Create creates one or more keywords in batch.
+// POST /keywords  body: {keywords:["k1","k2"], category:"机场"}
+func (h *KeywordHandler) Create(c *gin.Context) {
+	var body struct {
+		Keywords []string `json:"keywords" binding:"required,min=1"`
+		Category string   `json:"category"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		Fail(c, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	var created []model.ManagedKeyword
+	for _, kw := range body.Keywords {
+		if kw == "" {
+			continue
+		}
+		k := model.ManagedKeyword{
+			Keyword:  kw,
+			Category: body.Category,
+			Status:   "active",
+		}
+		// Use FirstOrCreate to avoid duplicate errors.
+		if err := h.db.Where(model.ManagedKeyword{Keyword: kw}).FirstOrCreate(&k).Error; err != nil {
+			Fail(c, 500, err.Error())
+			return
+		}
+		created = append(created, k)
+	}
+
+	OK(c, created)
+}
+
+// Update modifies a keyword.
+// PUT /keywords/:id
+func (h *KeywordHandler) Update(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, http.StatusBadRequest, "invalid id")
+		return
+	}
+
+	var body struct {
+		Keyword  string `json:"keyword"`
+		Category string `json:"category"`
+		Status   string `json:"status"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		Fail(c, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	var kw model.ManagedKeyword
+	if err := h.db.First(&kw, id).Error; err != nil {
+		Fail(c, 404, "keyword not found")
+		return
+	}
+
+	updates := map[string]interface{}{}
+	if body.Keyword != "" {
+		updates["keyword"] = body.Keyword
+	}
+	if body.Category != "" {
+		updates["category"] = body.Category
+	}
+	if body.Status != "" {
+		updates["status"] = body.Status
+	}
+
+	if err := h.db.Model(&kw).Updates(updates).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	h.db.First(&kw, id)
+	OK(c, kw)
+}
+
+// Delete removes a keyword by ID.
+// DELETE /keywords/:id
+func (h *KeywordHandler) Delete(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, http.StatusBadRequest, "invalid id")
+		return
+	}
+
+	if err := h.db.Delete(&model.ManagedKeyword{}, id).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+	OK(c, nil)
+}

+ 186 - 0
internal/handler/merchant.go

@@ -0,0 +1,186 @@
+package handler
+
+import (
+	"net/http"
+	"strconv"
+
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// MerchantHandler handles merchant queries.
+type MerchantHandler struct {
+	db *gorm.DB
+}
+
+// Stats returns aggregate statistics for merchants.
+// GET /merchants/stats
+func (h *MerchantHandler) Stats(c *gin.Context) {
+	type countRow struct {
+		Key   string `json:"key"`
+		Count int64  `json:"count"`
+	}
+
+	var rawTotal int64
+	h.db.Model(&model.MerchantRaw{}).Count(&rawTotal)
+
+	var cleanTotal int64
+	h.db.Model(&model.MerchantClean{}).Count(&cleanTotal)
+
+	// Count by status in clean table.
+	statusCounts := map[string]int64{}
+	var statusRows []struct {
+		Status string
+		Cnt    int64
+	}
+	h.db.Model(&model.MerchantClean{}).
+		Select("status, count(*) as cnt").
+		Group("status").
+		Scan(&statusRows)
+	for _, r := range statusRows {
+		statusCounts[r.Status] = r.Cnt
+	}
+
+	// Count by source_type in raw table.
+	var sourceRows []struct {
+		SourceType string
+		Cnt        int64
+	}
+	h.db.Model(&model.MerchantRaw{}).
+		Select("source_type, count(*) as cnt").
+		Group("source_type").
+		Scan(&sourceRows)
+	bySource := map[string]int64{}
+	for _, r := range sourceRows {
+		bySource[r.SourceType] = r.Cnt
+	}
+
+	// Count by industry in clean table.
+	var industryRows []struct {
+		Industry string
+		Cnt      int64
+	}
+	h.db.Model(&model.MerchantClean{}).
+		Select("industry, count(*) as cnt").
+		Group("industry").
+		Scan(&industryRows)
+	byIndustry := map[string]int64{}
+	for _, r := range industryRows {
+		byIndustry[r.Industry] = r.Cnt
+	}
+
+	OK(c, gin.H{
+		"raw_total":   rawTotal,
+		"clean_total": cleanTotal,
+		"valid":       statusCounts["valid"],
+		"invalid":     statusCounts["invalid"],
+		"bot":         statusCounts["bot"],
+		"duplicate":   statusCounts["duplicate"],
+		"group":       statusCounts["group"],
+		"by_source":   bySource,
+		"by_industry": byIndustry,
+	})
+}
+
+// ListRaw returns raw merchants with filters and pagination.
+// GET /merchants/raw?status=&source_type=&page=&page_size=
+func (h *MerchantHandler) ListRaw(c *gin.Context) {
+	page, pageSize, offset := parsePage(c)
+
+	query := h.db.Model(&model.MerchantRaw{})
+	if status := c.Query("status"); status != "" {
+		query = query.Where("status = ?", status)
+	}
+	if sourceType := c.Query("source_type"); sourceType != "" {
+		query = query.Where("source_type = ?", sourceType)
+	}
+
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	var items []model.MerchantRaw
+	if err := query.Order("created_at DESC").Limit(pageSize).Offset(offset).Find(&items).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, items, total, page, pageSize)
+}
+
+// ListClean returns clean merchants with filters and pagination.
+// GET /merchants/clean?status=&industry=&min_score=&sort=quality_score&order=desc&page=&page_size=
+func (h *MerchantHandler) ListClean(c *gin.Context) {
+	page, pageSize, offset := parsePage(c)
+
+	query := h.db.Model(&model.MerchantClean{})
+	if status := c.Query("status"); status != "" {
+		query = query.Where("status = ?", status)
+	}
+	if industry := c.Query("industry"); industry != "" {
+		query = query.Where("industry = ?", industry)
+	}
+	if minScore := c.Query("min_score"); minScore != "" {
+		if score, err := strconv.ParseFloat(minScore, 64); err == nil {
+			query = query.Where("quality_score >= ?", score)
+		}
+	}
+
+	sortField := c.DefaultQuery("sort", "quality_score")
+	// whitelist sort fields to prevent SQL injection
+	allowedSort := map[string]bool{
+		"quality_score": true,
+		"created_at":    true,
+		"updated_at":    true,
+		"member_count":  true,
+	}
+	if !allowedSort[sortField] {
+		sortField = "quality_score"
+	}
+	order := c.DefaultQuery("order", "desc")
+	if order != "asc" && order != "desc" {
+		order = "desc"
+	}
+
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	var items []model.MerchantClean
+	if err := query.Order(sortField + " " + order).Limit(pageSize).Offset(offset).Find(&items).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, items, total, page, pageSize)
+}
+
+// GetByID fetches a merchant by ID, checking clean table first then raw.
+// GET /merchants/:id
+func (h *MerchantHandler) GetByID(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, http.StatusBadRequest, "invalid id")
+		return
+	}
+
+	var clean model.MerchantClean
+	if err := h.db.First(&clean, id).Error; err == nil {
+		OK(c, gin.H{"source": "clean", "data": clean})
+		return
+	}
+
+	var raw model.MerchantRaw
+	if err := h.db.First(&raw, id).Error; err == nil {
+		OK(c, gin.H{"source": "raw", "data": raw})
+		return
+	}
+
+	Fail(c, 404, "merchant not found")
+}

+ 38 - 0
internal/handler/nav_site.go

@@ -0,0 +1,38 @@
+package handler
+
+import (
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// NavSiteHandler handles nav site queries.
+type NavSiteHandler struct {
+	db *gorm.DB
+}
+
+// List returns nav sites with optional status filter and pagination.
+// GET /nav-sites?status=&page=&page_size=
+func (h *NavSiteHandler) List(c *gin.Context) {
+	page, pageSize, offset := parsePage(c)
+
+	query := h.db.Model(&model.NavSite{})
+	if status := c.Query("status"); status != "" {
+		query = query.Where("status = ?", status)
+	}
+
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	var items []model.NavSite
+	if err := query.Order("id DESC").Limit(pageSize).Offset(offset).Find(&items).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, items, total, page, pageSize)
+}

+ 104 - 0
internal/handler/response.go

@@ -0,0 +1,104 @@
+package handler
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+)
+
+// Response is the unified response structure.
+type Response struct {
+	Code    int         `json:"code"`
+	Message string      `json:"message"`
+	Data    interface{} `json:"data"`
+}
+
+// PageResponse wraps paginated results.
+type PageResponse struct {
+	Items    interface{} `json:"items"`
+	Total    int64       `json:"total"`
+	Page     int         `json:"page"`
+	PageSize int         `json:"page_size"`
+}
+
+// OK sends a 200 response with data.
+func OK(c *gin.Context, data interface{}) {
+	c.JSON(http.StatusOK, Response{
+		Code:    0,
+		Message: "ok",
+		Data:    data,
+	})
+}
+
+// Fail sends an error response.
+func Fail(c *gin.Context, code int, msg string) {
+	httpStatus := http.StatusBadRequest
+	if code == 404 {
+		httpStatus = http.StatusNotFound
+	} else if code == 500 {
+		httpStatus = http.StatusInternalServerError
+	} else if code == 501 {
+		httpStatus = http.StatusNotImplemented
+	}
+	c.JSON(httpStatus, Response{
+		Code:    code,
+		Message: msg,
+		Data:    nil,
+	})
+}
+
+// PageOK sends a paginated 200 response.
+func PageOK(c *gin.Context, items interface{}, total int64, page, pageSize int) {
+	c.JSON(http.StatusOK, Response{
+		Code:    0,
+		Message: "ok",
+		Data: PageResponse{
+			Items:    items,
+			Total:    total,
+			Page:     page,
+			PageSize: pageSize,
+		},
+	})
+}
+
+// parsePage extracts page and page_size from query params.
+// page defaults to 1, page_size defaults to 20, capped at 100.
+func parsePage(c *gin.Context) (page int, pageSize int, offset int) {
+	page = 1
+	pageSize = 20
+	if p := c.Query("page"); p != "" {
+		if v := parseInt(p, 1); v > 0 {
+			page = v
+		}
+	}
+	if ps := c.Query("page_size"); ps != "" {
+		if v := parseInt(ps, 20); v > 0 {
+			pageSize = v
+		}
+	}
+	if pageSize > 100 {
+		pageSize = 100
+	}
+	offset = (page - 1) * pageSize
+	return
+}
+
+func parseInt(s string, def int) int {
+	n := def
+	for _, ch := range s {
+		if ch < '0' || ch > '9' {
+			return def
+		}
+		n = n*10 + int(ch-'0')
+	}
+	_ = n
+	// simple atoi
+	result := 0
+	for _, ch := range s {
+		if ch < '0' || ch > '9' {
+			return def
+		}
+		result = result*10 + int(ch-'0')
+	}
+	return result
+}

+ 78 - 0
internal/handler/router.go

@@ -0,0 +1,78 @@
+package handler
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/redis/go-redis/v9"
+	"gorm.io/gorm"
+
+	"spider/internal/service"
+)
+
+// SetupRouter builds and returns the Gin engine with all routes registered.
+func SetupRouter(db *gorm.DB, rdb *redis.Client, taskSvc *service.TaskService) *gin.Engine {
+	r := gin.Default()
+
+	// Health check.
+	r.GET("/ping", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"message": "pong"})
+	})
+
+	api := r.Group("/api/v1")
+
+	// Dashboard.
+	dash := &DashboardHandler{db: db}
+	api.GET("/dashboard", dash.Get)
+
+	// Channels.
+	ch := &ChannelHandler{db: db}
+	api.GET("/channels", ch.List)
+	api.GET("/channels/stats", ch.Stats)
+
+	// Keywords.
+	kw := &KeywordHandler{db: db}
+	api.GET("/keywords", kw.List)
+	api.POST("/keywords", kw.Create)
+	api.PUT("/keywords/:id", kw.Update)
+	api.DELETE("/keywords/:id", kw.Delete)
+
+	// Seeds.
+	sd := &SeedHandler{db: db}
+	api.GET("/seeds", sd.List)
+	api.POST("/seeds", sd.Create)
+	api.PUT("/seeds/:id", sd.Update)
+	api.DELETE("/seeds/:id", sd.Delete)
+
+	// Merchants.
+	mc := &MerchantHandler{db: db}
+	api.GET("/merchants/stats", mc.Stats)
+	api.GET("/merchants/raw", mc.ListRaw)
+	api.GET("/merchants/clean", mc.ListClean)
+	api.GET("/merchants/:id", mc.GetByID)
+
+	// Nav sites.
+	ns := &NavSiteHandler{db: db}
+	api.GET("/nav-sites", ns.List)
+
+	// Config / Settings.
+	cfg := &ConfigHandler{db: db}
+	api.GET("/config/settings", cfg.ListSettings)
+	api.PUT("/config/settings/:key", cfg.UpdateSetting)
+
+	// Tasks.
+	th := NewTaskHandler(db, taskSvc, rdb)
+	api.GET("/tasks", th.List)
+	api.POST("/tasks/start", th.Start)
+	api.GET("/tasks/:id", th.Get)
+	api.POST("/tasks/:id/stop", th.Stop)
+	api.GET("/tasks/:id/logs", th.Logs)
+
+	return r
+}
+
+// ServerAddr returns the listen address string for the given port.
+func ServerAddr(port int) string {
+	return fmt.Sprintf(":%d", port)
+}

+ 120 - 0
internal/handler/seed.go

@@ -0,0 +1,120 @@
+package handler
+
+import (
+	"net/http"
+	"strconv"
+
+	"spider/internal/model"
+
+	"github.com/gin-gonic/gin"
+	"gorm.io/gorm"
+)
+
+// SeedHandler handles managed seed CRUD.
+type SeedHandler struct {
+	db *gorm.DB
+}
+
+// List returns seeds with optional status filter and pagination.
+// GET /seeds?page=1&page_size=20&status=active
+func (h *SeedHandler) List(c *gin.Context) {
+	page, pageSize, offset := parsePage(c)
+
+	query := h.db.Model(&model.ManagedSeed{})
+	if status := c.Query("status"); status != "" {
+		query = query.Where("status = ?", status)
+	}
+
+	var total int64
+	if err := query.Count(&total).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	var seeds []model.ManagedSeed
+	if err := query.Order("id DESC").Limit(pageSize).Offset(offset).Find(&seeds).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, seeds, total, page, pageSize)
+}
+
+// Create creates a new seed.
+// POST /seeds  body: {channel_name, note}
+func (h *SeedHandler) Create(c *gin.Context) {
+	var body struct {
+		ChannelName string `json:"channel_name" binding:"required"`
+		Note        string `json:"note"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		Fail(c, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	seed := model.ManagedSeed{
+		ChannelName: body.ChannelName,
+		Note:        body.Note,
+		Status:      "active",
+	}
+	if err := h.db.Create(&seed).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+	OK(c, seed)
+}
+
+// Update modifies a seed's status and/or note.
+// PUT /seeds/:id  body: {status, note}
+func (h *SeedHandler) Update(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, http.StatusBadRequest, "invalid id")
+		return
+	}
+
+	var body struct {
+		Status string `json:"status"`
+		Note   string `json:"note"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		Fail(c, http.StatusBadRequest, err.Error())
+		return
+	}
+
+	var seed model.ManagedSeed
+	if err := h.db.First(&seed, id).Error; err != nil {
+		Fail(c, 404, "seed not found")
+		return
+	}
+
+	updates := map[string]interface{}{}
+	if body.Status != "" {
+		updates["status"] = body.Status
+	}
+	updates["note"] = body.Note
+
+	if err := h.db.Model(&seed).Updates(updates).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	h.db.First(&seed, id)
+	OK(c, seed)
+}
+
+// Delete removes a seed by ID.
+// DELETE /seeds/:id
+func (h *SeedHandler) Delete(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, http.StatusBadRequest, "invalid id")
+		return
+	}
+
+	if err := h.db.Delete(&model.ManagedSeed{}, id).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+	OK(c, nil)
+}

+ 250 - 0
internal/handler/task.go

@@ -0,0 +1,250 @@
+package handler
+
+import (
+	"fmt"
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/gorilla/websocket"
+	"github.com/redis/go-redis/v9"
+	"gorm.io/gorm"
+
+	"spider/internal/model"
+	"spider/internal/service"
+)
+
+// validTaskTypes is the set of accepted task_type values.
+var validTaskTypes = map[string]bool{
+	"full":     true,
+	"discover": true,
+	"search":   true,
+	"github":   true,
+	"scrape":   true,
+	"crawl":    true,
+	"clean":    true,
+	"score":    true,
+}
+
+// TaskHandler handles task-related HTTP and WebSocket requests.
+type TaskHandler struct {
+	db          *gorm.DB
+	taskService *service.TaskService
+	redis       *redis.Client
+	upgrader    websocket.Upgrader
+}
+
+// NewTaskHandler creates a TaskHandler.
+func NewTaskHandler(db *gorm.DB, svc *service.TaskService, rdb *redis.Client) *TaskHandler {
+	return &TaskHandler{
+		db:          db,
+		taskService: svc,
+		redis:       rdb,
+		upgrader: websocket.Upgrader{
+			CheckOrigin: func(r *http.Request) bool { return true },
+		},
+	}
+}
+
+// List handles GET /tasks
+// Query params: status, page, page_size
+func (h *TaskHandler) List(c *gin.Context) {
+	status := c.Query("status")
+	page, _ := strconv.Atoi(c.DefaultQuery("page", "1"))
+	pageSize, _ := strconv.Atoi(c.DefaultQuery("page_size", "20"))
+	if page < 1 {
+		page = 1
+	}
+	if pageSize < 1 || pageSize > 100 {
+		pageSize = 20
+	}
+	offset := (page - 1) * pageSize
+
+	query := h.db.Model(&model.Task{}).Order("created_at DESC")
+	if status != "" {
+		query = query.Where("status = ?", status)
+	}
+
+	var total int64
+	query.Count(&total)
+
+	var tasks []model.Task
+	if err := query.Limit(pageSize).Offset(offset).Find(&tasks).Error; err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	PageOK(c, tasks, total, page, pageSize)
+}
+
+// Start handles POST /tasks/start
+func (h *TaskHandler) Start(c *gin.Context) {
+	var req service.StartTaskRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		Fail(c, 400, err.Error())
+		return
+	}
+
+	if !validTaskTypes[req.TaskType] {
+		Fail(c, 400, fmt.Sprintf("invalid task_type: %s", req.TaskType))
+		return
+	}
+
+	task, err := h.taskService.StartTask(req)
+	if err != nil {
+		Fail(c, 409, err.Error())
+		return
+	}
+
+	c.JSON(http.StatusCreated, Response{Code: 0, Message: "ok", Data: task})
+}
+
+// Get handles GET /tasks/:id
+func (h *TaskHandler) Get(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, 400, "invalid id")
+		return
+	}
+
+	var task model.Task
+	if err := h.db.First(&task, id).Error; err != nil {
+		Fail(c, 404, "task not found")
+		return
+	}
+
+	progress := h.taskService.GetProgress(&task)
+
+	OK(c, gin.H{
+		"task":     task,
+		"progress": progress,
+	})
+}
+
+// Stop handles POST /tasks/:id/stop
+func (h *TaskHandler) Stop(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		Fail(c, 400, "invalid id")
+		return
+	}
+
+	var body struct {
+		Force bool `json:"force"`
+	}
+	_ = c.ShouldBindJSON(&body)
+
+	if err := h.taskService.StopTask(uint(id), body.Force); err != nil {
+		Fail(c, 500, err.Error())
+		return
+	}
+
+	OK(c, gin.H{"message": "stop signal sent"})
+}
+
+// Logs handles GET /tasks/:id/logs via WebSocket.
+// On connect it immediately sends history logs from Redis, then streams live progress
+// until the task finishes or the client disconnects.
+func (h *TaskHandler) Logs(c *gin.Context) {
+	id, err := strconv.ParseUint(c.Param("id"), 10, 64)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid id"})
+		return
+	}
+
+	conn, err := h.upgrader.Upgrade(c.Writer, c.Request, nil)
+	if err != nil {
+		return
+	}
+	defer conn.Close()
+
+	ctx := c.Request.Context()
+	ticker := time.NewTicker(time.Second)
+	defer ticker.Stop()
+
+	send := func(msg string) bool {
+		err := conn.WriteMessage(websocket.TextMessage, []byte(msg))
+		return err == nil
+	}
+
+	// Fetch task record immediately.
+	var task model.Task
+	if err := h.db.First(&task, id).Error; err != nil {
+		send(fmt.Sprintf("[错误] 任务 #%d 不存在", id))
+		return
+	}
+
+	// Send history logs from Redis list first.
+	logKey := fmt.Sprintf("spider:task:logs:%d", id)
+	historyLogs, _ := h.redis.LRange(ctx, logKey, 0, -1).Result()
+	for _, line := range historyLogs {
+		if !send(line) {
+			return
+		}
+	}
+
+	// If no history logs, send current task status summary.
+	if len(historyLogs) == 0 {
+		send(fmt.Sprintf("[信息] 任务 #%d (%s) 状态: %s", task.ID, task.TaskType, task.Status))
+
+		// Also send current Redis progress if available.
+		progressKey := fmt.Sprintf("spider:task:progress:%d", id)
+		vals, _ := h.redis.HGetAll(ctx, progressKey).Result()
+		if len(vals) > 0 {
+			msg := fmt.Sprintf("[进度] 阶段: %s | 进度: %s/%s | %s",
+				vals["phase"], vals["current"], vals["total"], vals["message"])
+			send(msg)
+		}
+	}
+
+	// If the task has already finished, send completion message and close.
+	if task.Status == "completed" || task.Status == "failed" || task.Status == "stopped" {
+		statusLabel := map[string]string{
+			"completed": "完成",
+			"failed":    "失败",
+			"stopped":   "停止",
+		}[task.Status]
+		send(fmt.Sprintf("[完成] 任务已%s", statusLabel))
+		return
+	}
+
+	// Task is still running — handle client close messages in the background.
+	clientGone := make(chan struct{})
+	go func() {
+		for {
+			if _, _, err := conn.ReadMessage(); err != nil {
+				close(clientGone)
+				return
+			}
+		}
+	}()
+
+	progressKey := fmt.Sprintf("spider:task:progress:%d", id)
+
+	for {
+		select {
+		case <-clientGone:
+			return
+		case <-ticker.C:
+			var t model.Task
+			if err := h.db.First(&t, id).Error; err != nil {
+				return
+			}
+
+			vals, _ := h.redis.HGetAll(ctx, progressKey).Result()
+			if len(vals) > 0 {
+				msg := fmt.Sprintf("[进度] 阶段: %s | %s/%s | %s",
+					vals["phase"], vals["current"], vals["total"], vals["message"])
+				if !send(msg) {
+					return
+				}
+			}
+
+			if t.Status == "completed" || t.Status == "failed" || t.Status == "stopped" {
+				send(fmt.Sprintf("[完成] 任务已结束,状态: %s", t.Status))
+				return
+			}
+		}
+	}
+}

+ 0 - 0
internal/llm/.gitkeep


+ 178 - 0
internal/llm/client.go

@@ -0,0 +1,178 @@
+package llm
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+
+	openai "github.com/sashabaranov/go-openai"
+
+	"spider/internal/extractor"
+)
+
+// Client OpenAI 兼容的 LLM 客户端
+type Client struct {
+	client  *openai.Client
+	model   string
+	timeout time.Duration
+}
+
+// New 创建客户端,支持任意 OpenAI 兼容接口
+// baseURL 为空时使用 OpenAI 官方接口
+func New(baseURL, apiKey, model string, timeout time.Duration) *Client {
+	cfg := openai.DefaultConfig(apiKey)
+	if baseURL != "" {
+		cfg.BaseURL = baseURL
+	}
+	return &Client{
+		client:  openai.NewClientWithConfig(cfg),
+		model:   model,
+		timeout: timeout,
+	}
+}
+
+// chat 内部封装:发送 system + user 消息,返回第一条回复文本
+func (c *Client) chat(ctx context.Context, system, user string) (string, error) {
+	ctx, cancel := context.WithTimeout(ctx, c.timeout)
+	defer cancel()
+
+	resp, err := c.client.CreateChatCompletion(ctx, openai.ChatCompletionRequest{
+		Model: c.model,
+		Messages: []openai.ChatCompletionMessage{
+			{Role: openai.ChatMessageRoleSystem, Content: system},
+			{Role: openai.ChatMessageRoleUser, Content: user},
+		},
+	})
+	if err != nil {
+		return "", fmt.Errorf("llm chat: %w", err)
+	}
+	if len(resp.Choices) == 0 {
+		return "", fmt.Errorf("llm chat: empty response")
+	}
+	return strings.TrimSpace(resp.Choices[0].Message.Content), nil
+}
+
+// EvalChannelRelevance 评估 TG 频道是否与商户相关
+// 返回相关度评分 0-1,<0.5 认为不相关
+// 调用失败时返回 0.5 表示不确定
+func (c *Client) EvalChannelRelevance(ctx context.Context, name, about string, memberCount int) (float64, error) {
+	const system = `你是商户识别专家。请判断以下 Telegram 频道是否与商户/卖家/服务提供商相关。
+只关注是否有商品/服务在售。返回 0-1 的数字,1 表示高度相关,0 表示完全不相关。只返回数字,不要解释。`
+
+	user := fmt.Sprintf("频道名:%s\n简介:%s\n成员数:%d", name, about, memberCount)
+
+	text, err := c.chat(ctx, system, user)
+	if err != nil {
+		return 0.5, err
+	}
+
+	score, parseErr := strconv.ParseFloat(text, 64)
+	if parseErr != nil {
+		// 尝试从文本中提取第一个数字
+		fields := strings.Fields(text)
+		for _, f := range fields {
+			if s, e := strconv.ParseFloat(f, 64); e == nil {
+				return clamp01(s), nil
+			}
+		}
+		return 0.5, fmt.Errorf("llm eval: cannot parse score from %q", text)
+	}
+	return clamp01(score), nil
+}
+
+// ParseMerchant 从消息文本中解析商户信息
+// 用于正则提取失败时的 fallback,或提取非标准格式如"加V:xxx"
+func (c *Client) ParseMerchant(ctx context.Context, message string) (*extractor.MerchantInfo, error) {
+	const system = `你是信息提取专家。从以下文本中提取商户联系信息,返回 JSON 格式。
+字段:merchant_name, tg_username(不含@), website, email, phone, industry, description
+如果某字段没有信息则为空字符串。只返回 JSON,不要 markdown 代码块。`
+
+	text, err := c.chat(ctx, system, message)
+	if err != nil {
+		return defaultMerchantInfo(), err
+	}
+
+	// 去除可能的 markdown 代码块包裹
+	text = stripMarkdownCode(text)
+
+	info := &extractor.MerchantInfo{}
+	if jsonErr := json.Unmarshal([]byte(text), info); jsonErr != nil {
+		return defaultMerchantInfo(), fmt.Errorf("llm parse merchant: json unmarshal: %w (raw: %s)", jsonErr, text)
+	}
+	return info, nil
+}
+
+// ClassifyIndustry 行业分类
+// 返回行业标签:机场/发卡/成人/电商/游戏/其他 等
+func (c *Client) ClassifyIndustry(ctx context.Context, name, about string) (string, error) {
+	const system = `你是电商行业分类专家。根据频道信息,从以下类别中选择最匹配的一个:
+机场、发卡、成人、电商、游戏充值、金融、软件工具、其他
+只返回类别名称,不要解释。`
+
+	user := fmt.Sprintf("名称:%s,简介:%s", name, about)
+
+	text, err := c.chat(ctx, system, user)
+	if err != nil {
+		return "其他", err
+	}
+	return strings.TrimSpace(text), nil
+}
+
+// IsNavSite 判断 URL 是否是导航站/目录站
+// 返回 (是否是导航站, 置信度 0-1)
+func (c *Client) IsNavSite(ctx context.Context, url string) (bool, float64, error) {
+	const system = `判断以下 URL 是否是导航站、目录站或聚合站(收录多个商家/服务的网站)。
+返回 JSON: {"is_nav": true/false, "confidence": 0.0-1.0}`
+
+	text, err := c.chat(ctx, system, url)
+	if err != nil {
+		return false, 0, err
+	}
+
+	text = stripMarkdownCode(text)
+
+	var result struct {
+		IsNav      bool    `json:"is_nav"`
+		Confidence float64 `json:"confidence"`
+	}
+	if jsonErr := json.Unmarshal([]byte(text), &result); jsonErr != nil {
+		return false, 0, fmt.Errorf("llm is_nav_site: json unmarshal: %w (raw: %s)", jsonErr, text)
+	}
+	return result.IsNav, clamp01(result.Confidence), nil
+}
+
+// stripMarkdownCode 去除 LLM 响应中可能包含的 markdown 代码块标记
+func stripMarkdownCode(s string) string {
+	s = strings.TrimSpace(s)
+	// 去除 ```json ... ``` 或 ``` ... ```
+	if strings.HasPrefix(s, "```") {
+		lines := strings.SplitN(s, "\n", 2)
+		if len(lines) == 2 {
+			s = lines[1]
+		}
+		if idx := strings.LastIndex(s, "```"); idx >= 0 {
+			s = s[:idx]
+		}
+		s = strings.TrimSpace(s)
+	}
+	return s
+}
+
+// clamp01 将浮点数限制在 [0, 1] 范围内
+func clamp01(v float64) float64 {
+	if v < 0 {
+		return 0
+	}
+	if v > 1 {
+		return 1
+	}
+	return v
+}
+
+// defaultMerchantInfo 返回空的 MerchantInfo(JSON 解析失败时的默认值)
+func defaultMerchantInfo() *extractor.MerchantInfo {
+	return &extractor.MerchantInfo{}
+}

+ 18 - 0
internal/model/channel.go

@@ -0,0 +1,18 @@
+package model
+
+import "time"
+
+type Channel struct {
+	ID             uint      `gorm:"primaryKey;autoIncrement" json:"id"`
+	Username       string    `gorm:"uniqueIndex;size:255;not null" json:"username"`
+	Title          string    `gorm:"size:500" json:"title"`
+	MemberCount    int       `gorm:"default:0" json:"member_count"`
+	About          string    `gorm:"type:text" json:"about"`
+	Source         string    `gorm:"type:enum('seed','snowball','search','github');not null;index" json:"source"`
+	SourceDetail   string    `gorm:"size:500" json:"source_detail"`
+	Status         string    `gorm:"type:enum('pending','scraped','failed','skipped');default:'pending';index" json:"status"`
+	LastMessageID  int       `gorm:"default:0" json:"last_message_id"`
+	RelevanceScore float64   `json:"relevance_score"`
+	CreatedAt      time.Time `json:"created_at"`
+	UpdatedAt      time.Time `json:"updated_at"`
+}

+ 12 - 0
internal/model/config_revision.go

@@ -0,0 +1,12 @@
+package model
+
+import "time"
+
+type ConfigRevision struct {
+	ID         uint      `gorm:"primaryKey;autoIncrement"`
+	SettingKey string    `gorm:"size:255;not null;index"`
+	OldValue   string    `gorm:"type:text"`
+	NewValue   string    `gorm:"type:text"`
+	ChangedBy  string    `gorm:"size:100;default:'admin'"`
+	CreatedAt  time.Time
+}

+ 11 - 0
internal/model/keyword.go

@@ -0,0 +1,11 @@
+package model
+
+import "time"
+
+type ManagedKeyword struct {
+	ID        uint      `gorm:"primaryKey;autoIncrement" json:"id"`
+	Keyword   string    `gorm:"uniqueIndex;size:255;not null" json:"keyword"`
+	Category  string    `gorm:"size:100" json:"category"`
+	Status    string    `gorm:"type:enum('active','inactive');default:'active'" json:"status"`
+	CreatedAt time.Time `json:"created_at"`
+}

+ 30 - 0
internal/model/merchant_clean.go

@@ -0,0 +1,30 @@
+package model
+
+import (
+	"time"
+
+	"gorm.io/datatypes"
+)
+
+type MerchantClean struct {
+	ID           uint           `gorm:"primaryKey;autoIncrement" json:"id"`
+	RawID        *uint          `gorm:"index" json:"raw_id"`
+	MerchantName string         `gorm:"size:500" json:"merchant_name"`
+	TgUsername   string         `gorm:"uniqueIndex;size:255" json:"tg_username"`
+	Website      string         `gorm:"size:2048" json:"website"`
+	Email        string         `gorm:"size:255" json:"email"`
+	Phone        string         `gorm:"size:100" json:"phone"`
+	Industry     string         `gorm:"size:100;index" json:"industry"`
+	Status       string         `gorm:"type:enum('valid','invalid','bot','duplicate','group');not null;index" json:"status"`
+	TgFirstName  string         `gorm:"size:255" json:"tg_first_name"`
+	TgLastName   string         `gorm:"size:255" json:"tg_last_name"`
+	IsPremium    bool           `gorm:"default:false" json:"is_premium"`
+	LastOnline   *time.Time     `json:"last_online"`
+	ActiveLevel  string         `gorm:"type:enum('active','moderate','inactive')" json:"active_level"`
+	MemberCount  int            `gorm:"default:0" json:"member_count"`
+	QualityScore float64        `gorm:"default:0;index" json:"quality_score"`
+	SourceCount  int            `gorm:"default:1" json:"source_count"`
+	SourceLinks  datatypes.JSON `gorm:"type:json" json:"source_links"`
+	CreatedAt    time.Time      `json:"created_at"`
+	UpdatedAt    time.Time      `json:"updated_at"`
+}

+ 18 - 0
internal/model/merchant_raw.go

@@ -0,0 +1,18 @@
+package model
+
+import "time"
+
+type MerchantRaw struct {
+	ID              uint      `gorm:"primaryKey;autoIncrement" json:"id"`
+	MerchantName    string    `gorm:"size:500" json:"merchant_name"`
+	TgUsername      string    `gorm:"size:255;index" json:"tg_username"`
+	Website         string    `gorm:"size:2048" json:"website"`
+	Email           string    `gorm:"size:255" json:"email"`
+	Phone           string    `gorm:"size:100" json:"phone"`
+	Industry        string    `gorm:"size:100" json:"industry"`
+	SourceType      string    `gorm:"type:enum('tg_scrape','web_crawl','github');not null" json:"source_type"`
+	SourceID        string    `gorm:"size:500" json:"source_id"`
+	OriginalMessage string    `gorm:"type:text" json:"original_message"`
+	Status          string    `gorm:"type:enum('raw','glm_parsed');default:'raw';index" json:"status"`
+	CreatedAt       time.Time `json:"created_at"`
+}

+ 14 - 0
internal/model/nav_site.go

@@ -0,0 +1,14 @@
+package model
+
+import "time"
+
+type NavSite struct {
+	ID            uint      `gorm:"primaryKey;autoIncrement" json:"id"`
+	URL           string    `gorm:"uniqueIndex:idx_url,length:500;type:varchar(2048);not null" json:"url"`
+	Domain        string    `gorm:"size:255;index" json:"domain"`
+	Source        string    `gorm:"size:100" json:"source"`
+	Status        string    `gorm:"type:enum('pending','scraped','filtered','failed');default:'pending';index" json:"status"`
+	FilterReason  string    `gorm:"size:255" json:"filter_reason"`
+	MerchantCount int       `gorm:"default:0" json:"merchant_count"`
+	CreatedAt     time.Time `json:"created_at"`
+}

+ 12 - 0
internal/model/seed.go

@@ -0,0 +1,12 @@
+package model
+
+import "time"
+
+type ManagedSeed struct {
+	ID          uint      `gorm:"primaryKey;autoIncrement" json:"id"`
+	ChannelName string    `gorm:"uniqueIndex;size:255;not null" json:"channel_name"`
+	Status      string    `gorm:"type:enum('active','inactive');default:'active'" json:"status"`
+	Note        string    `gorm:"size:500" json:"note"`
+	CreatedAt   time.Time `json:"created_at"`
+	UpdatedAt   time.Time `json:"updated_at"`
+}

+ 13 - 0
internal/model/setting.go

@@ -0,0 +1,13 @@
+package model
+
+import "time"
+
+type ManagedSetting struct {
+	ID          uint      `gorm:"primaryKey;autoIncrement" json:"id"`
+	KeyName     string    `gorm:"uniqueIndex;column:key_name;size:255;not null" json:"key_name"`
+	Value       string    `gorm:"type:text;not null" json:"value"`
+	ValueType   string    `gorm:"type:enum('int','float','bool','string','json');not null" json:"value_type"`
+	EffectLevel string    `gorm:"type:enum('runtime','new_task');default:'runtime'" json:"effect_level"`
+	Description string    `gorm:"size:500" json:"description"`
+	UpdatedAt   time.Time `json:"updated_at"`
+}

+ 20 - 0
internal/model/task.go

@@ -0,0 +1,20 @@
+package model
+
+import (
+	"time"
+
+	"gorm.io/datatypes"
+)
+
+type Task struct {
+	ID         uint           `gorm:"primaryKey;autoIncrement" json:"id"`
+	TaskType   string         `gorm:"type:enum('full','discover','search','github','scrape','crawl','clean','score');not null;index" json:"task_type"`
+	Status     string         `gorm:"type:enum('pending','running','completed','failed','stopped');default:'pending';index" json:"status"`
+	Params     datatypes.JSON `gorm:"type:json" json:"params"`
+	Progress   datatypes.JSON `gorm:"type:json" json:"progress"`
+	Result     datatypes.JSON `gorm:"type:json" json:"result"`
+	ErrorMsg   string         `gorm:"type:text" json:"error_msg"`
+	StartedAt  *time.Time     `json:"started_at"`
+	FinishedAt *time.Time     `json:"finished_at"`
+	CreatedAt  time.Time      `json:"created_at"`
+}

+ 0 - 0
internal/pipeline/.gitkeep


+ 46 - 0
internal/pipeline/phase.go

@@ -0,0 +1,46 @@
+package pipeline
+
+import (
+	"context"
+	"spider/internal/model"
+)
+
+// Settings is a minimal interface satisfied by *service.SettingsService.
+// Using an interface here avoids an import cycle (service → worker → pipeline → service).
+type Settings interface {
+	GetInt(ctx context.Context, key string, defaultVal int) int
+	GetFloat(ctx context.Context, key string, defaultVal float64) float64
+	GetBool(ctx context.Context, key string, defaultVal bool) bool
+}
+
+// Phase 每个采集阶段的接口
+type Phase interface {
+	Name() string
+	Run(ctx context.Context, task *model.Task, opts *Options) error
+}
+
+// Options Pipeline 执行选项(来自任务参数)
+type Options struct {
+	Target     string
+	SkipPhases []string
+	TestRun    *TestRun
+}
+
+type TestRun struct {
+	ItemLimit    int
+	MessageLimit int
+}
+
+// ProgressReporter 进度上报函数类型
+// 由 pipeline.Runner 提供,各 phase 调用
+type ProgressReporter func(phase string, current, total int, message string)
+
+// ShouldSkip 检查某阶段是否被跳过
+func ShouldSkip(phaseName string, skipPhases []string) bool {
+	for _, s := range skipPhases {
+		if s == phaseName {
+			return true
+		}
+	}
+	return false
+}

+ 183 - 0
internal/pipeline/phase1_discover.go

@@ -0,0 +1,183 @@
+package pipeline
+
+import (
+	"context"
+	"log"
+	"regexp"
+	"strings"
+	"time"
+
+	"spider/internal/model"
+	"spider/internal/telegram"
+
+	"gorm.io/gorm"
+)
+
+// DiscoverPhase Phase 1: TG 频道裂变发现
+type DiscoverPhase struct {
+	db        *gorm.DB
+	tgManager *telegram.AccountManager
+	settings  Settings
+	reporter  ProgressReporter
+}
+
+// NewDiscoverPhase creates a new DiscoverPhase.
+func NewDiscoverPhase(db *gorm.DB, tgManager *telegram.AccountManager, settings Settings) *DiscoverPhase {
+	return &DiscoverPhase{
+		db:        db,
+		tgManager: tgManager,
+		settings:  settings,
+	}
+}
+
+func (p *DiscoverPhase) Name() string { return "discover" }
+
+func (p *DiscoverPhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	log.Printf("[discover] starting, task_id=%d", task.ID)
+
+	if p.tgManager == nil {
+		log.Printf("[discover] tgManager is nil, skipping")
+		return nil
+	}
+
+	// 1. 读配置
+	maxDepth := 3
+	maxPerLayer := p.settings.GetInt(ctx, "snowball.max_channels_per_layer", 200)
+	maxTotal := p.settings.GetInt(ctx, "snowball.max_channels_total", 500)
+
+	// 2. 从 managed_seeds 拿所有 active 种子
+	var seeds []model.ManagedSeed
+	p.db.Where("status = ?", "active").Find(&seeds)
+	log.Printf("[discover] found %d active seeds", len(seeds))
+
+	// 3. BFS 队列
+	type QueueItem struct {
+		Username string
+		Depth    int
+		Source   string // "seed" 或 "snowball"
+	}
+
+	queue := make([]QueueItem, 0, len(seeds))
+	for _, s := range seeds {
+		queue = append(queue, QueueItem{Username: s.ChannelName, Depth: 0, Source: "seed"})
+	}
+
+	visited := map[string]bool{}
+	totalFound := 0
+
+	// 4. BFS 处理
+	for len(queue) > 0 && totalFound < maxTotal {
+		if isContextDone(ctx) {
+			break
+		}
+
+		item := queue[0]
+		queue = queue[1:]
+
+		username := cleanUsername(item.Username)
+		if username == "" || visited[username] {
+			continue
+		}
+		visited[username] = true
+
+		// 获取 TG 账号
+		acc, err := p.tgManager.Acquire(ctx)
+		if err != nil {
+			log.Printf("[discover] no available TG account: %v", err)
+			break
+		}
+
+		// 连接并获取频道信息
+		if err := acc.Client.Connect(ctx); err != nil {
+			log.Printf("[discover] connect failed for account: %v", err)
+			p.tgManager.Release(acc, 0)
+			continue
+		}
+
+		channelInfo, err := acc.Client.GetChannelInfo(ctx, username)
+		if err != nil {
+			if fw, ok := err.(*telegram.FloodWaitError); ok {
+				log.Printf("[discover] FloodWait %ds on @%s", fw.Seconds, username)
+				p.tgManager.HandleFloodWait(acc, fw.Seconds)
+			} else {
+				log.Printf("[discover] GetChannelInfo error @%s: %v", username, err)
+				p.tgManager.Release(acc, 0)
+			}
+			continue
+		}
+
+		// 写入 channels 表(忽略 unique 冲突)
+		ch := &model.Channel{
+			Username:    username,
+			Title:       channelInfo.Title,
+			MemberCount: channelInfo.MemberCount,
+			About:       channelInfo.About,
+			Source:      item.Source,
+			Status:      "pending",
+		}
+		p.db.Where(model.Channel{Username: username}).FirstOrCreate(ch)
+		totalFound++
+
+		if p.reporter != nil {
+			p.reporter("discover", totalFound, maxTotal, "发现频道: @"+username)
+		}
+
+		// 如果还没到最大深度,读消息提取更多频道
+		if item.Depth < maxDepth {
+			msgs, err := acc.Client.GetMessages(ctx, username, 0, 100)
+			if err == nil {
+				layerCount := 0
+				for _, msg := range msgs {
+					if layerCount >= maxPerLayer {
+						break
+					}
+					// 提取 forward 来源频道
+					if msg.ForwardFromChannel != "" {
+						fwdName := cleanUsername(msg.ForwardFromChannel)
+						if fwdName != "" && !visited[fwdName] {
+							queue = append(queue, QueueItem{fwdName, item.Depth + 1, "snowball"})
+							layerCount++
+						}
+					}
+					// 提取消息中的 t.me 链接
+					for _, link := range msg.Links {
+						name := extractUsernameFromLink(link)
+						if name != "" && !visited[name] {
+							queue = append(queue, QueueItem{name, item.Depth + 1, "snowball"})
+							layerCount++
+						}
+					}
+				}
+			} else {
+				log.Printf("[discover] GetMessages @%s: %v", username, err)
+			}
+		}
+
+		p.tgManager.Release(acc, 0)
+
+		// 频道间 sleep
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-time.After(5 * time.Second):
+		}
+	}
+
+	log.Printf("[discover] done, found %d channels", totalFound)
+	return nil
+}
+
+// cleanUsername 清理用户名(去除 @ 前缀及空白)
+func cleanUsername(s string) string {
+	return strings.TrimPrefix(strings.TrimSpace(s), "@")
+}
+
+// extractUsernameFromLink 从 t.me/xxx 链接提取用户名
+func extractUsernameFromLink(link string) string {
+	re := regexp.MustCompile(`t(?:elegram)?\.me/([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	m := re.FindStringSubmatch(link)
+	if len(m) > 1 {
+		return m[1]
+	}
+	return ""
+}

+ 129 - 0
internal/pipeline/phase2_search.go

@@ -0,0 +1,129 @@
+package pipeline
+
+import (
+	"context"
+	"log"
+	"net/url"
+	"regexp"
+	"time"
+
+	"spider/internal/model"
+	"spider/internal/search"
+
+	"gorm.io/gorm"
+)
+
+// SearchPhase Phase 2: 搜索引擎采集
+type SearchPhase struct {
+	db       *gorm.DB
+	serper   *search.SerperClient
+	settings Settings
+	reporter ProgressReporter
+}
+
+// NewSearchPhase creates a new SearchPhase.
+func NewSearchPhase(db *gorm.DB, serper *search.SerperClient, settings Settings) *SearchPhase {
+	return &SearchPhase{
+		db:       db,
+		serper:   serper,
+		settings: settings,
+	}
+}
+
+func (p *SearchPhase) Name() string { return "search" }
+
+func (p *SearchPhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	if p.serper == nil {
+		log.Println("[search] no serper client configured, skipping")
+		return nil
+	}
+
+	// 取 active 关键词
+	var keywords []model.ManagedKeyword
+	q := p.db.Where("status = ?", "active")
+	if opts.TestRun != nil && opts.TestRun.ItemLimit > 0 {
+		q = q.Limit(opts.TestRun.ItemLimit)
+	}
+	q.Find(&keywords)
+
+	total := len(keywords)
+	channelCount, navCount := 0, 0
+
+	for i, kw := range keywords {
+		if isContextDone(ctx) {
+			break
+		}
+
+		if p.reporter != nil {
+			p.reporter("search", i+1, total, "搜索: "+kw.Keyword)
+		}
+
+		results, err := p.serper.Search(ctx, kw.Keyword)
+		if err != nil {
+			log.Printf("[search] keyword=%s err=%v", kw.Keyword, err)
+			continue
+		}
+
+		for _, r := range results {
+			switch search.ClassifyURL(r.URL) {
+			case "tg_channel":
+				username := extractTGUsername(r.URL)
+				if username == "" {
+					continue
+				}
+				ch := &model.Channel{
+					Username:     username,
+					Source:       "search",
+					SourceDetail: kw.Keyword,
+					Status:       "pending",
+				}
+				result := p.db.Where(model.Channel{Username: username}).FirstOrCreate(ch)
+				if result.RowsAffected > 0 {
+					channelCount++
+				}
+
+			case "nav_site":
+				domain := extractDomain(r.URL)
+				site := &model.NavSite{
+					URL:    r.URL,
+					Domain: domain,
+					Source: kw.Keyword,
+					Status: "pending",
+				}
+				result := p.db.Where("url = ?", r.URL).FirstOrCreate(site)
+				if result.RowsAffected > 0 {
+					navCount++
+				}
+			}
+		}
+
+		// 关键词间 sleep 2s
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-time.After(2 * time.Second):
+		}
+	}
+
+	log.Printf("[search] done: %d channels, %d nav_sites found", channelCount, navCount)
+	return nil
+}
+
+// extractTGUsername 从 t.me/username 或 telegram.me/username 提取用户名
+func extractTGUsername(rawURL string) string {
+	re := regexp.MustCompile(`(?:t(?:elegram)?\.me)/([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	m := re.FindStringSubmatch(rawURL)
+	if len(m) > 1 {
+		return m[1]
+	}
+	return ""
+}
+
+// extractDomain 从 URL 中提取域名
+func extractDomain(rawURL string) string {
+	u, err := url.Parse(rawURL)
+	if err != nil {
+		return ""
+	}
+	return u.Hostname()
+}

+ 250 - 0
internal/pipeline/phase3_github.go

@@ -0,0 +1,250 @@
+package pipeline
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"regexp"
+	"strings"
+	"time"
+
+	"spider/internal/extractor"
+	"spider/internal/model"
+
+	"gorm.io/gorm"
+)
+
+// GithubPhase Phase 3: GitHub README 挖掘
+type GithubPhase struct {
+	db       *gorm.DB
+	token    string // GitHub token(可选)
+	settings Settings
+	reporter ProgressReporter
+	http     *http.Client
+}
+
+// NewGithubPhase creates a new GithubPhase.
+func NewGithubPhase(db *gorm.DB, token string, settings Settings) *GithubPhase {
+	return &GithubPhase{
+		db:       db,
+		token:    token,
+		settings: settings,
+		http:     &http.Client{Timeout: 15 * time.Second},
+	}
+}
+
+func (p *GithubPhase) Name() string { return "github" }
+
+func (p *GithubPhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	// GitHub 搜索 query:从 managed_keywords 取前 10 个生成 query
+	var keywords []model.ManagedKeyword
+	p.db.Where("status = ?", "active").Limit(10).Find(&keywords)
+
+	queries := []string{}
+	for _, kw := range keywords {
+		queries = append(queries, fmt.Sprintf("%s telegram", kw.Keyword))
+	}
+
+	itemLimit := 50 // 默认处理 50 个 repo
+	if opts.TestRun != nil && opts.TestRun.ItemLimit > 0 {
+		itemLimit = opts.TestRun.ItemLimit
+	}
+
+	total := len(queries)
+	found := 0
+
+	reposPerQuery := 1
+	if len(queries) > 0 {
+		reposPerQuery = itemLimit/len(queries) + 1
+	}
+
+	for i, query := range queries {
+		if isContextDone(ctx) {
+			break
+		}
+		if p.reporter != nil {
+			p.reporter("github", i+1, total, "GitHub搜索: "+query)
+		}
+
+		repos, err := p.searchRepos(ctx, query, reposPerQuery)
+		if err != nil {
+			log.Printf("[github] search err: %v", err)
+			continue
+		}
+
+		for _, repo := range repos {
+			if isContextDone(ctx) {
+				break
+			}
+
+			readme, err := p.fetchReadme(ctx, repo)
+			if err != nil {
+				continue
+			}
+
+			// 过滤:README 前 5000 字必须含中文
+			preview := readme
+			if len(preview) > 5000 {
+				preview = preview[:5000]
+			}
+			if !extractor.ContainsChinese(preview, 0) {
+				continue
+			}
+
+			// 提取 t.me 链接
+			links := extractTMeLinks(readme)
+			for _, link := range links {
+				// 前后 200 字必须含中文
+				idx := strings.Index(readme, link)
+				if idx < 0 {
+					continue
+				}
+				start := idx - 200
+				if start < 0 {
+					start = 0
+				}
+				end := idx + len(link) + 200
+				if end > len(readme) {
+					end = len(readme)
+				}
+				context200 := readme[start:end]
+				if !extractor.ContainsChinese(context200, 0) {
+					continue
+				}
+
+				username := extractTGUsernameFromLink(link)
+				if username == "" {
+					continue
+				}
+
+				ch := &model.Channel{
+					Username:     username,
+					Source:       "github",
+					SourceDetail: repo,
+					Status:       "pending",
+				}
+				result := p.db.Where(model.Channel{Username: username}).FirstOrCreate(ch)
+				if result.RowsAffected > 0 {
+					found++
+				}
+			}
+
+			// repo 间 sleep 2s
+			select {
+			case <-ctx.Done():
+				return nil
+			case <-time.After(2 * time.Second):
+			}
+		}
+
+		// query 间 sleep 5s
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-time.After(5 * time.Second):
+		}
+	}
+
+	log.Printf("[github] done: %d channels found", found)
+	return nil
+}
+
+// searchRepos 通过 GitHub Search API 搜索仓库
+func (p *GithubPhase) searchRepos(ctx context.Context, query string, limit int) ([]string, error) {
+	perPage := limit
+	if perPage > 30 {
+		perPage = 30
+	}
+	apiURL := fmt.Sprintf("https://api.github.com/search/repositories?q=%s&sort=stars&per_page=%d",
+		url.QueryEscape(query), perPage)
+
+	req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Accept", "application/vnd.github.v3+json")
+	if p.token != "" {
+		req.Header.Set("Authorization", "token "+p.token)
+	}
+
+	resp, err := p.http.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	var result struct {
+		Items []struct {
+			FullName string `json:"full_name"`
+		} `json:"items"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, err
+	}
+
+	var repos []string
+	for _, item := range result.Items {
+		repos = append(repos, item.FullName)
+	}
+	return repos, nil
+}
+
+// fetchReadme 下载 README.md(先尝试 main 分支,失败则尝试 master)
+func (p *GithubPhase) fetchReadme(ctx context.Context, fullName string) (string, error) {
+	rawURL := fmt.Sprintf("https://raw.githubusercontent.com/%s/main/README.md", fullName)
+	req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil)
+	if err != nil {
+		return "", err
+	}
+	if p.token != "" {
+		req.Header.Set("Authorization", "token "+p.token)
+	}
+
+	resp, err := p.http.Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == 404 {
+		// 尝试 master 分支
+		masterURL := strings.Replace(rawURL, "/main/", "/master/", 1)
+		req2, err := http.NewRequestWithContext(ctx, "GET", masterURL, nil)
+		if err != nil {
+			return "", err
+		}
+		if p.token != "" {
+			req2.Header.Set("Authorization", "token "+p.token)
+		}
+		resp2, err := p.http.Do(req2)
+		if err != nil {
+			return "", err
+		}
+		defer resp2.Body.Close()
+		data, _ := io.ReadAll(resp2.Body)
+		return string(data), nil
+	}
+
+	data, _ := io.ReadAll(resp.Body)
+	return string(data), nil
+}
+
+// extractTMeLinks 从文本中提取所有 t.me 链接
+func extractTMeLinks(text string) []string {
+	re := regexp.MustCompile(`https?://t(?:elegram)?\.me/[a-zA-Z][a-zA-Z0-9_]{4,31}`)
+	return re.FindAllString(text, -1)
+}
+
+// extractTGUsernameFromLink 从 t.me/xxx 链接提取用户名
+func extractTGUsernameFromLink(link string) string {
+	re := regexp.MustCompile(`t(?:elegram)?\.me/([a-zA-Z][a-zA-Z0-9_]{4,31})`)
+	m := re.FindStringSubmatch(link)
+	if len(m) > 1 {
+		return m[1]
+	}
+	return ""
+}

+ 220 - 0
internal/pipeline/phase4_scrape.go

@@ -0,0 +1,220 @@
+package pipeline
+
+import (
+	"context"
+	"log"
+	"strings"
+	"time"
+
+	"github.com/redis/go-redis/v9"
+	"gorm.io/gorm"
+
+	"spider/internal/extractor"
+	"spider/internal/llm"
+	"spider/internal/model"
+	"spider/internal/telegram"
+)
+
+// ScrapePhase Phase 4: TG 消息采集
+type ScrapePhase struct {
+	db        *gorm.DB
+	tgManager *telegram.AccountManager
+	llmClient *llm.Client
+	settings  Settings
+	redis     *redis.Client
+	reporter  ProgressReporter
+}
+
+// NewScrapePhase creates a new ScrapePhase.
+func NewScrapePhase(db *gorm.DB, tgManager *telegram.AccountManager, llmClient *llm.Client, settings Settings, rdb *redis.Client) *ScrapePhase {
+	return &ScrapePhase{
+		db:        db,
+		tgManager: tgManager,
+		llmClient: llmClient,
+		settings:  settings,
+		redis:     rdb,
+	}
+}
+
+func (p *ScrapePhase) Name() string { return "scrape" }
+
+func (p *ScrapePhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	log.Printf("[scrape] starting, task_id=%d", task.ID)
+
+	if p.tgManager == nil {
+		log.Printf("[scrape] tgManager is nil, skipping")
+		return nil
+	}
+
+	msgLimit := p.settings.GetInt(ctx, "tg_scraper.message_limit_per_channel", 500)
+	delayMsg := p.settings.GetFloat(ctx, "tg_scraper.delay_per_message", 1.0)
+	delayChannel := p.settings.GetFloat(ctx, "tg_scraper.delay_per_channel", 5.0)
+
+	if opts.TestRun != nil && opts.TestRun.MessageLimit > 0 {
+		msgLimit = opts.TestRun.MessageLimit
+	}
+
+	// 取 pending 频道
+	var channels []model.Channel
+	q := p.db.Where("status = ?", "pending")
+	if opts.TestRun != nil && opts.TestRun.ItemLimit > 0 {
+		q = q.Limit(opts.TestRun.ItemLimit)
+	}
+	q.Find(&channels)
+
+	total := len(channels)
+	log.Printf("[scrape] found %d pending channels", total)
+
+	for i, ch := range channels {
+		if isContextDone(ctx) {
+			break
+		}
+
+		if p.reporter != nil {
+			p.reporter("scrape", i+1, total, "采集频道: @"+ch.Username)
+		}
+
+		acc, err := p.tgManager.Acquire(ctx)
+		if err != nil {
+			log.Printf("[scrape] no available account: %v", err)
+			break
+		}
+
+		if err := acc.Client.Connect(ctx); err != nil {
+			log.Printf("[scrape] connect failed: %v", err)
+			p.tgManager.Release(acc, 0)
+			p.db.Model(&ch).Update("status", "failed")
+			continue
+		}
+
+		// LLM 相关性评估
+		if p.llmClient != nil {
+			score, err := p.llmClient.EvalChannelRelevance(ctx, ch.Title, ch.About, ch.MemberCount)
+			if err == nil && score < 0.5 {
+				log.Printf("[scrape] skipping @%s, relevance score=%.2f", ch.Username, score)
+				p.tgManager.Release(acc, 0)
+				p.db.Model(&ch).Update("status", "skipped")
+				continue
+			}
+		}
+
+		// 读置顶消息
+		pinnedMsgs, _ := acc.Client.GetPinnedMessages(ctx, ch.Username)
+		p.processMessages(ctx, pinnedMsgs, &ch, delayMsg)
+
+		// 读历史消息(断点续传)
+		offsetID := ch.LastMessageID
+		fetched := 0
+		for fetched < msgLimit {
+			if isContextDone(ctx) {
+				break
+			}
+
+			batchSize := 100
+			if msgLimit-fetched < batchSize {
+				batchSize = msgLimit - fetched
+			}
+
+			msgs, err := acc.Client.GetMessages(ctx, ch.Username, offsetID, batchSize)
+			if err != nil {
+				if fw, ok := err.(*telegram.FloodWaitError); ok {
+					log.Printf("[scrape] FloodWait %ds on @%s", fw.Seconds, ch.Username)
+					p.tgManager.HandleFloodWait(acc, fw.Seconds)
+					acc = nil
+				} else {
+					log.Printf("[scrape] GetMessages @%s: %v", ch.Username, err)
+				}
+				break
+			}
+			if len(msgs) == 0 {
+				break
+			}
+
+			p.processMessages(ctx, msgs, &ch, delayMsg)
+
+			// 更新断点
+			lastID := msgs[len(msgs)-1].ID
+			p.db.Model(&ch).Update("last_message_id", lastID)
+			offsetID = lastID
+			fetched += len(msgs)
+		}
+
+		if acc != nil {
+			p.tgManager.Release(acc, 0)
+		}
+		p.db.Model(&ch).Update("status", "scraped")
+
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-time.After(time.Duration(float64(time.Second) * delayChannel)):
+		}
+	}
+
+	log.Printf("[scrape] done")
+	return nil
+}
+
+// processMessages 处理一批消息,提取商户写入 merchants_raw
+func (p *ScrapePhase) processMessages(ctx context.Context, msgs []telegram.Message, ch *model.Channel, delayMsg float64) {
+	for _, msg := range msgs {
+		if msg.IsService || msg.Text == "" {
+			continue
+		}
+		if !extractor.ContainsChinese(msg.Text, 0) {
+			continue
+		}
+		if !extractor.HasContact(msg.Text) {
+			continue
+		}
+
+		// 快速去重(Redis SET NX key)
+		if p.redis != nil {
+			info := extractor.Extract(msg.Text)
+			if info.TgUsername != "" {
+				dedupKey := "spider:dedup:merchant:" + info.TgUsername
+				set, _ := p.redis.SetNX(ctx, dedupKey, "1", 7*24*time.Hour).Result()
+				if !set {
+					continue // 已存在,跳过
+				}
+			}
+		}
+
+		// LLM 精准解析
+		var merchantInfo *extractor.MerchantInfo
+		if p.llmClient != nil {
+			merchantInfo, _ = p.llmClient.ParseMerchant(ctx, msg.Text)
+		}
+
+		// Fallback 到正则
+		if merchantInfo == nil || merchantInfo.TgUsername == "" {
+			info := extractor.Extract(msg.Text)
+			merchantInfo = &extractor.MerchantInfo{
+				TgUsername: info.TgUsername,
+				Website:    info.Website,
+				Email:      info.Email,
+				Phone:      info.Phone,
+			}
+		}
+
+		if merchantInfo.TgUsername == "" && merchantInfo.Website == "" {
+			continue
+		}
+
+		raw := &model.MerchantRaw{
+			MerchantName:    extractor.CleanMerchantName(merchantInfo.MerchantName),
+			TgUsername:      strings.TrimPrefix(merchantInfo.TgUsername, "@"),
+			Website:         merchantInfo.Website,
+			Email:           merchantInfo.Email,
+			Phone:           merchantInfo.Phone,
+			Industry:        merchantInfo.Industry,
+			SourceType:      "tg_scrape",
+			SourceID:        ch.Username,
+			OriginalMessage: msg.Text,
+			Status:          "raw",
+		}
+		p.db.Create(raw)
+
+		time.Sleep(time.Duration(float64(time.Second) * delayMsg))
+	}
+}

+ 200 - 0
internal/pipeline/phase5_crawl.go

@@ -0,0 +1,200 @@
+package pipeline
+
+import (
+	"context"
+	"log"
+	"strings"
+
+	"gorm.io/gorm"
+
+	"spider/internal/crawler"
+	"spider/internal/extractor"
+	"spider/internal/llm"
+	"spider/internal/model"
+)
+
+// CrawlPhase Phase 5: 网页爬取
+type CrawlPhase struct {
+	db            *gorm.DB
+	staticCrawler *crawler.StaticCrawler
+	dynCrawler    *crawler.DynamicCrawler
+	tmeValidator  *crawler.TMeValidator
+	llmClient     *llm.Client
+	settings      Settings
+	reporter      ProgressReporter
+}
+
+// NewCrawlPhase creates a new CrawlPhase.
+func NewCrawlPhase(db *gorm.DB, llmClient *llm.Client, settings Settings) *CrawlPhase {
+	return &CrawlPhase{
+		db:            db,
+		staticCrawler: crawler.NewStaticCrawler(),
+		dynCrawler:    crawler.NewDynamicCrawler(),
+		tmeValidator:  crawler.NewTMeValidator(),
+		llmClient:     llmClient,
+		settings:      settings,
+	}
+}
+
+func (p *CrawlPhase) Name() string { return "crawl" }
+
+func (p *CrawlPhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	log.Printf("[crawl] starting, task_id=%d", task.ID)
+
+	tmeEnabled := true
+	if p.settings != nil {
+		tmeEnabled = p.settings.GetBool(ctx, "tme_validator.enabled", true)
+	}
+
+	var navSites []model.NavSite
+	q := p.db.Where("status = ?", "pending")
+	if opts.TestRun != nil && opts.TestRun.ItemLimit > 0 {
+		q = q.Limit(opts.TestRun.ItemLimit)
+	}
+	q.Find(&navSites)
+
+	total := len(navSites)
+	merchantCount := 0
+
+	for i, site := range navSites {
+		if isContextDone(ctx) {
+			break
+		}
+
+		if p.reporter != nil {
+			p.reporter("crawl", i+1, total, "爬取: "+site.URL)
+		}
+
+		// 预过滤
+		filterResult := crawler.RuleFilter(site.URL)
+		if filterResult == crawler.FilterDiscard {
+			p.db.Model(&site).Updates(map[string]interface{}{
+				"status":        "filtered",
+				"filter_reason": "blacklist",
+			})
+			continue
+		}
+
+		// 不确定的 URL,交 LLM 判断
+		if filterResult == crawler.FilterUncertain && p.llmClient != nil {
+			isNav, confidence, err := p.llmClient.IsNavSite(ctx, site.URL)
+			if err != nil || !isNav || confidence < 0.6 {
+				p.db.Model(&site).Updates(map[string]interface{}{
+					"status":        "filtered",
+					"filter_reason": "llm_reject",
+				})
+				continue
+			}
+		}
+
+		// 爬取:先尝试静态,失败则动态
+		result := p.staticCrawler.Crawl(ctx, site.URL)
+		if result.Error != nil || result.HTML == "" {
+			log.Printf("[crawl] static failed for %s, trying dynamic", site.URL)
+			result = p.dynCrawler.Crawl(ctx, site.URL)
+		}
+
+		if result.Error != nil {
+			p.db.Model(&site).Update("status", "failed")
+			continue
+		}
+
+		// 过滤非中文页面
+		snippet := result.HTML
+		if len(snippet) > 5000 {
+			snippet = snippet[:5000]
+		}
+		if !extractor.ContainsChinese(snippet, 0) {
+			p.db.Model(&site).Updates(map[string]interface{}{
+				"status":        "filtered",
+				"filter_reason": "non_chinese",
+			})
+			continue
+		}
+
+		// 处理发现的 TG 链接
+		for _, tgLink := range result.TgLinks {
+			username := crawler.ExtractTGUsername(tgLink)
+			if username == "" {
+				continue
+			}
+
+			// t.me 死号预检
+			if tmeEnabled {
+				if !p.tmeValidator.IsAlive(ctx, username) {
+					log.Printf("[crawl] dead account: %s", username)
+					continue
+				}
+			}
+
+			raw := &model.MerchantRaw{
+				TgUsername: username,
+				SourceType: "web_crawl",
+				SourceID:   site.URL,
+				Status:     "raw",
+			}
+			p.db.Create(raw)
+			merchantCount++
+		}
+
+		// 处理普通链接(商户官网子页)
+		for _, link := range result.Links {
+			if isContextDone(ctx) {
+				break
+			}
+			// 排除 TG 链接(已处理)和无效链接
+			if strings.Contains(link, "t.me") || strings.Contains(link, "telegram.me") {
+				continue
+			}
+			if crawler.RuleFilter(link) == crawler.FilterDiscard {
+				continue
+			}
+
+			// 爬商户官网子页提取联系方式
+			p.crawlMerchantSite(ctx, link, site.URL)
+		}
+
+		p.db.Model(&site).Updates(map[string]interface{}{
+			"status":         "scraped",
+			"merchant_count": merchantCount,
+		})
+	}
+
+	log.Printf("[crawl] done: %d merchants found", merchantCount)
+	return nil
+}
+
+// crawlMerchantSite 爬取商户官网,提取联系方式
+func (p *CrawlPhase) crawlMerchantSite(ctx context.Context, siteURL, sourceURL string) {
+	subPages := []string{siteURL, siteURL + "/contact", siteURL + "/about", siteURL + "/关于我们"}
+
+	for _, page := range subPages {
+		if isContextDone(ctx) {
+			break
+		}
+
+		result := p.staticCrawler.Crawl(ctx, page)
+		if result.Error != nil || result.HTML == "" {
+			continue
+		}
+
+		info := extractor.Extract(result.HTML)
+		if !info.HasContact {
+			continue
+		}
+
+		raw := &model.MerchantRaw{
+			TgUsername: info.TgUsername,
+			Website:    siteURL,
+			Email:      info.Email,
+			Phone:      info.Phone,
+			SourceType: "web_crawl",
+			SourceID:   sourceURL,
+			Status:     "raw",
+		}
+		if raw.TgUsername != "" || raw.Email != "" || raw.Phone != "" {
+			p.db.Create(raw)
+		}
+		break // 找到联系方式就停止
+	}
+}

+ 322 - 0
internal/pipeline/phase6_clean.go

@@ -0,0 +1,322 @@
+package pipeline
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"math"
+	"regexp"
+	"strings"
+	"time"
+
+	"gorm.io/datatypes"
+	"gorm.io/gorm"
+
+	"spider/internal/extractor"
+	"spider/internal/model"
+	"spider/internal/telegram"
+)
+
+// CleanPhase Phase 6: 数据清洗
+type CleanPhase struct {
+	db        *gorm.DB
+	tgManager *telegram.AccountManager
+	settings  Settings
+	reporter  ProgressReporter
+}
+
+// NewCleanPhase creates a new CleanPhase.
+func NewCleanPhase(db *gorm.DB, tgManager *telegram.AccountManager, settings Settings) *CleanPhase {
+	return &CleanPhase{
+		db:        db,
+		tgManager: tgManager,
+		settings:  settings,
+	}
+}
+
+func (p *CleanPhase) Name() string { return "clean" }
+
+func (p *CleanPhase) SetReporter(r ProgressReporter) { p.reporter = r }
+
+func (p *CleanPhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	// 取所有 status=raw 的商户
+	var raws []model.MerchantRaw
+	q := p.db.Where("status = ?", "raw")
+	if opts.TestRun != nil && opts.TestRun.ItemLimit > 0 {
+		q = q.Limit(opts.TestRun.ItemLimit)
+	}
+	q.Find(&raws)
+
+	total := len(raws)
+	log.Printf("[clean] processing %d raw merchants", total)
+
+	// 第一关:黑名单过滤
+	var pass1 []model.MerchantRaw
+	for _, raw := range raws {
+		status := p.filterBlacklist(raw)
+		if status != "" {
+			p.saveCleaned(raw, status, nil)
+		} else {
+			pass1 = append(pass1, raw)
+		}
+	}
+
+	if p.reporter != nil {
+		p.reporter("clean", 1, 3, "第一关完成,剩余 "+itoa(len(pass1))+" 条")
+	}
+
+	// 第二关:去重
+	pass2 := p.deduplicate(pass1)
+
+	if p.reporter != nil {
+		p.reporter("clean", 2, 3, "第二关完成,去重后 "+itoa(len(pass2))+" 条")
+	}
+
+	// 第三关:TG 真实性验证(有独立 rate limiter)
+	delayVerify := 3.0
+	if p.settings != nil {
+		delayVerify = p.settings.GetFloat(ctx, "tg_scraper.delay_per_verify", 3.0)
+	}
+
+	for i, raw := range pass2 {
+		if isContextDone(ctx) {
+			break
+		}
+
+		if p.reporter != nil {
+			p.reporter("clean", i+1, len(pass2), "验证: @"+raw.TgUsername)
+		}
+
+		if raw.TgUsername == "" {
+			// 没有 TG 用户名但有其他联系方式,标记为 valid
+			p.saveCleaned(raw, "valid", nil)
+			continue
+		}
+
+		userInfo, err := p.verifyTG(ctx, raw.TgUsername)
+		if err != nil {
+			log.Printf("[clean] verify error for %s: %v", raw.TgUsername, err)
+			continue
+		}
+
+		status := "invalid"
+		if userInfo != nil {
+			if userInfo.IsChannel {
+				status = "group"
+			} else if userInfo.IsBot {
+				status = "bot"
+			} else if userInfo.Exists {
+				status = "valid"
+			}
+		}
+
+		p.saveCleaned(raw, status, userInfo)
+
+		// 独立 rate limiter
+		select {
+		case <-ctx.Done():
+			return nil
+		case <-time.After(time.Duration(float64(time.Second) * delayVerify)):
+		}
+	}
+
+	log.Printf("[clean] done")
+	return nil
+}
+
+// filterBlacklist 第一关:黑名单过滤
+// 返回应被标记的状态,"" 表示通过
+func (p *CleanPhase) filterBlacklist(raw model.MerchantRaw) string {
+	// 系统 bot 黑名单
+	botNames := []string{
+		"telegram", "telegramhints", "gif", "pic", "bing", "vid",
+		"bold", "vote", "like", "sticker", "music",
+		"channel_bot", "BotFather", "SpamBot",
+	}
+	username := strings.ToLower(raw.TgUsername)
+	for _, b := range botNames {
+		if username == strings.ToLower(b) {
+			return "bot"
+		}
+	}
+	// xxxbot 后缀
+	if strings.HasSuffix(username, "bot") && len(username) > 3 {
+		return "bot"
+	}
+
+	// 邀请链接哈希(16-24位 base64)
+	if len(raw.TgUsername) >= 16 && len(raw.TgUsername) <= 24 {
+		reBase64 := regexp.MustCompile(`^[A-Za-z0-9_-]{16,24}$`)
+		if reBase64.MatchString(raw.TgUsername) {
+			// 计算熵:如果大写+小写+数字混合度高,认为是哈希
+			if entropy(raw.TgUsername) > 3.5 {
+				return "invalid"
+			}
+		}
+	}
+
+	// original_message 非空且不含中文
+	if raw.OriginalMessage != "" && !extractor.ContainsChinese(raw.OriginalMessage, 0) {
+		return "invalid"
+	}
+
+	return ""
+}
+
+// entropy 计算字符串的信息熵
+func entropy(s string) float64 {
+	freq := map[rune]int{}
+	for _, r := range s {
+		freq[r]++
+	}
+	n := float64(len(s))
+	h := 0.0
+	for _, count := range freq {
+		p := float64(count) / n
+		h -= p * math.Log2(p)
+	}
+	return h
+}
+
+// deduplicate 第二关:去重
+// 同 tg_username 保留信息最丰富的一条,其余标 duplicate
+func (p *CleanPhase) deduplicate(raws []model.MerchantRaw) []model.MerchantRaw {
+	// 按 tg_username 分组
+	groups := map[string][]model.MerchantRaw{}
+	for _, raw := range raws {
+		key := raw.TgUsername
+		if key == "" {
+			key = raw.Website
+		}
+		if key == "" {
+			key = raw.Email
+		}
+		if key == "" {
+			key = itoa(int(raw.ID)) // 无法去重的保留
+		}
+		groups[key] = append(groups[key], raw)
+	}
+
+	var keepers []model.MerchantRaw
+	for _, group := range groups {
+		if len(group) == 1 {
+			keepers = append(keepers, group[0])
+			continue
+		}
+
+		// 按丰富度打分,保留最高分
+		best := group[0]
+		bestScore := richness(best)
+		for _, r := range group[1:] {
+			s := richness(r)
+			if s > bestScore {
+				// 将被替换的标为 duplicate
+				p.saveCleaned(best, "duplicate", nil)
+				bestScore = s
+				best = r
+			} else {
+				p.saveCleaned(r, "duplicate", nil)
+			}
+		}
+		keepers = append(keepers, best)
+	}
+
+	return keepers
+}
+
+// richness 信息丰富度评分
+func richness(r model.MerchantRaw) int {
+	score := 0
+	if r.TgUsername != "" {
+		score++
+	}
+	if r.Website != "" {
+		score++
+	}
+	if r.Email != "" {
+		score++
+	}
+	if r.Phone != "" {
+		score++
+	}
+	if r.MerchantName != "" {
+		score++
+	}
+	return score
+}
+
+// verifyTG 调用 TG API 验证用户名
+func (p *CleanPhase) verifyTG(ctx context.Context, username string) (*telegram.UserInfo, error) {
+	if p.tgManager == nil {
+		return nil, nil
+	}
+
+	acc, err := p.tgManager.Acquire(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := acc.Client.Connect(ctx); err != nil {
+		p.tgManager.Release(acc, 0)
+		return nil, err
+	}
+
+	userInfo, err := acc.Client.VerifyUser(ctx, username)
+	if err != nil {
+		if fw, ok := err.(*telegram.FloodWaitError); ok {
+			handleErr := p.tgManager.HandleFloodWait(acc, fw.Seconds)
+			return nil, handleErr
+		}
+		p.tgManager.Release(acc, 0)
+		return nil, err
+	}
+
+	p.tgManager.Release(acc, 0)
+	return userInfo, nil
+}
+
+// saveCleaned 将原始商户写入 merchants_clean
+func (p *CleanPhase) saveCleaned(raw model.MerchantRaw, status string, userInfo *telegram.UserInfo) {
+	clean := model.MerchantClean{
+		RawID:        &raw.ID,
+		MerchantName: raw.MerchantName,
+		TgUsername:   raw.TgUsername,
+		Website:      raw.Website,
+		Email:        raw.Email,
+		Phone:        raw.Phone,
+		Industry:     raw.Industry,
+		Status:       status,
+		SourceCount:  1,
+		SourceLinks:  datatypes.JSON([]byte(`[]`)),
+	}
+
+	if userInfo != nil && userInfo.Exists {
+		clean.TgFirstName = userInfo.FirstName
+		clean.TgLastName = userInfo.LastName
+		clean.IsPremium = userInfo.IsPremium
+		clean.LastOnline = userInfo.LastOnline
+		// 活跃度
+		if userInfo.LastOnline != nil {
+			days := time.Since(*userInfo.LastOnline).Hours() / 24
+			if days < 3 {
+				clean.ActiveLevel = "active"
+			} else if days < 30 {
+				clean.ActiveLevel = "moderate"
+			} else {
+				clean.ActiveLevel = "inactive"
+			}
+		}
+	}
+
+	// 冲突时按 tg_username unique 更新
+	if clean.TgUsername != "" {
+		p.db.Where(model.MerchantClean{TgUsername: clean.TgUsername}).FirstOrCreate(&clean)
+	} else {
+		p.db.Create(&clean)
+	}
+}
+
+// itoa converts int to string.
+func itoa(n int) string {
+	return fmt.Sprintf("%d", n)
+}

+ 126 - 0
internal/pipeline/phase7_score.go

@@ -0,0 +1,126 @@
+package pipeline
+
+import (
+	"context"
+	"log"
+	"math"
+
+	"gorm.io/gorm"
+
+	"spider/internal/model"
+)
+
+// ScorePhase Phase 7: 评分
+type ScorePhase struct {
+	db       *gorm.DB
+	reporter ProgressReporter
+}
+
+// NewScorePhase creates a new ScorePhase.
+func NewScorePhase(db *gorm.DB) *ScorePhase {
+	return &ScorePhase{db: db}
+}
+
+func (p *ScorePhase) Name() string { return "score" }
+
+func (p *ScorePhase) SetReporter(r ProgressReporter) { p.reporter = r }
+
+func (p *ScorePhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	var merchants []model.MerchantClean
+	p.db.Where("status = ?", "valid").Find(&merchants)
+
+	total := len(merchants)
+	log.Printf("[score] scoring %d valid merchants", total)
+
+	for i, m := range merchants {
+		if isContextDone(ctx) {
+			break
+		}
+
+		if p.reporter != nil {
+			p.reporter("score", i+1, total, "评分: @"+m.TgUsername)
+		}
+
+		score := calcScore(m)
+		p.db.Model(&m).Update("quality_score", score)
+	}
+
+	log.Printf("[score] done")
+	return nil
+}
+
+// calcScore 6 维度加权打分 (0-100)
+func calcScore(m model.MerchantClean) float64 {
+	// 维度1: member_count (权重 0.25)
+	memberScore := memberCountScore(m.MemberCount)
+
+	// 维度2: premium (权重 0.15)
+	premiumScore := 0.0
+	if m.IsPremium {
+		premiumScore = 100.0
+	}
+
+	// 维度3: activity (权重 0.25)
+	activityScore := activityLevelScore(m.ActiveLevel)
+
+	// 维度4: multi_source (权重 0.20)
+	multiScore := multiSourceScore(m.SourceCount)
+
+	// 维度5: has_website (权重 0.10)
+	websiteScore := 0.0
+	if m.Website != "" {
+		websiteScore = 100.0
+	}
+
+	// 维度6: has_email (权重 0.05)
+	emailScore := 0.0
+	if m.Email != "" {
+		emailScore = 100.0
+	}
+
+	total := memberScore*0.25 + premiumScore*0.15 + activityScore*0.25 +
+		multiScore*0.20 + websiteScore*0.10 + emailScore*0.05
+
+	return math.Round(total*100) / 100
+}
+
+func memberCountScore(count int) float64 {
+	switch {
+	case count >= 100000:
+		return 100
+	case count >= 10000:
+		return 80
+	case count >= 1000:
+		return 50
+	case count >= 100:
+		return 30
+	default:
+		return 10
+	}
+}
+
+func activityLevelScore(level string) float64 {
+	switch level {
+	case "active":
+		return 100
+	case "moderate":
+		return 50
+	case "inactive":
+		return 20
+	default:
+		return 0
+	}
+}
+
+func multiSourceScore(count int) float64 {
+	switch {
+	case count >= 4:
+		return 100
+	case count == 3:
+		return 70
+	case count == 2:
+		return 40
+	default:
+		return 10
+	}
+}

+ 112 - 0
internal/pipeline/pipeline.go

@@ -0,0 +1,112 @@
+package pipeline
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"spider/internal/model"
+
+	"github.com/redis/go-redis/v9"
+	"gorm.io/gorm"
+)
+
+// fullPhaseOrder defines the sequential execution order for a full pipeline run.
+var fullPhaseOrder = []string{
+	"discover",
+	"search",
+	"github",
+	"scrape",
+	"crawl",
+	"clean",
+	"score",
+}
+
+// Runner Pipeline 调度器
+type Runner struct {
+	db       *gorm.DB
+	redis    *redis.Client
+	phases   map[string]Phase // 注册的 phase,key 是 phase 名称
+	reporter ProgressReporter
+}
+
+// NewRunner creates a new pipeline Runner.
+func NewRunner(db *gorm.DB, rdb *redis.Client) *Runner {
+	return &Runner{
+		db:    db,
+		redis: rdb,
+		phases: make(map[string]Phase),
+	}
+}
+
+// RegisterPhase 注册一个 phase 实现
+func (r *Runner) RegisterPhase(p Phase) {
+	r.phases[p.Name()] = p
+}
+
+// SetProgressReporter 设置进度上报函数
+func (r *Runner) SetProgressReporter(fn ProgressReporter) {
+	r.reporter = fn
+}
+
+// report calls the reporter if one is set; otherwise logs to stderr.
+func (r *Runner) report(phase string, current, total int, message string) {
+	if r.reporter != nil {
+		r.reporter(phase, current, total, message)
+	}
+}
+
+// Run 执行 pipeline
+// task.TaskType: "full" | "discover" | "search" | "github" | "scrape" | "crawl" | "clean" | "score"
+// full 类型按顺序执行所有未跳过的 phase
+// 单阶段类型直接执行对应 phase
+func (r *Runner) Run(ctx context.Context, task *model.Task, opts *Options) error {
+	if task.TaskType == "full" {
+		for _, phaseName := range fullPhaseOrder {
+			if isContextDone(ctx) {
+				return fmt.Errorf("pipeline cancelled before phase %s", phaseName)
+			}
+			if ShouldSkip(phaseName, opts.SkipPhases) {
+				log.Printf("[pipeline] skipping phase=%s (in SkipPhases)", phaseName)
+				continue
+			}
+			r.report(phaseName, 0, 0, "开始 "+phaseName)
+			if err := r.runSingle(ctx, task, phaseName, opts); err != nil {
+				log.Printf("[pipeline] phase=%s error: %v (continuing)", phaseName, err)
+			}
+			r.report(phaseName, 100, 100, phaseName+" 完成")
+		}
+		return nil
+	}
+
+	// Single-phase task
+	phaseName := task.TaskType
+	if isContextDone(ctx) {
+		return fmt.Errorf("pipeline cancelled before phase %s", phaseName)
+	}
+	r.report(phaseName, 0, 0, "开始 "+phaseName)
+	if err := r.runSingle(ctx, task, phaseName, opts); err != nil {
+		r.report(phaseName, 0, 0, phaseName+" 失败: "+err.Error())
+		return err
+	}
+	r.report(phaseName, 100, 100, phaseName+" 完成")
+	return nil
+}
+
+// runSingle 执行单个 phase
+func (r *Runner) runSingle(ctx context.Context, task *model.Task, phaseName string, opts *Options) error {
+	p, ok := r.phases[phaseName]
+	if !ok {
+		return fmt.Errorf("phase %q not registered", phaseName)
+	}
+	return p.Run(ctx, task, opts)
+}
+
+// isContextDone 检查 context 是否已取消(用于各阶段检查停止信号)
+func isContextDone(ctx context.Context) bool {
+	select {
+	case <-ctx.Done():
+		return true
+	default:
+		return false
+	}
+}

+ 0 - 0
internal/search/.gitkeep


+ 141 - 0
internal/search/serper.go

@@ -0,0 +1,141 @@
+package search
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+)
+
+const serperEndpoint = "https://google.serper.dev/search"
+
+// SerperClient Serper API 客户端
+type SerperClient struct {
+	apiKey  string
+	perPage int
+	maxPage int
+	http    *http.Client
+}
+
+// NewSerperClient 创建 Serper 客户端
+func NewSerperClient(apiKey string, perPage, maxPage int) *SerperClient {
+	return &SerperClient{
+		apiKey:  apiKey,
+		perPage: perPage,
+		maxPage: maxPage,
+		http:    &http.Client{Timeout: 15 * time.Second},
+	}
+}
+
+// SearchResult 单条搜索结果
+type SearchResult struct {
+	Title   string
+	URL     string
+	Snippet string
+}
+
+// Search 搜索关键词,返回所有翻页结果
+func (c *SerperClient) Search(ctx context.Context, query string) ([]SearchResult, error) {
+	var results []SearchResult
+	for page := 1; page <= c.maxPage; page++ {
+		pageResults, err := c.searchPage(ctx, query, page)
+		if err != nil {
+			break
+		}
+		results = append(results, pageResults...)
+		if len(pageResults) < c.perPage {
+			break
+		}
+	}
+	return results, nil
+}
+
+// searchPage 搜索单页
+func (c *SerperClient) searchPage(ctx context.Context, query string, page int) ([]SearchResult, error) {
+	body := map[string]interface{}{
+		"q":    query,
+		"num":  c.perPage,
+		"page": page,
+		"gl":   "cn",
+		"hl":   "zh-cn",
+	}
+	data, _ := json.Marshal(body)
+
+	req, err := http.NewRequestWithContext(ctx, "POST", serperEndpoint, bytes.NewReader(data))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("X-API-KEY", c.apiKey)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.http.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != 200 {
+		return nil, fmt.Errorf("serper API error: %d", resp.StatusCode)
+	}
+
+	var result struct {
+		Organic []struct {
+			Title   string `json:"title"`
+			Link    string `json:"link"`
+			Snippet string `json:"snippet"`
+		} `json:"organic"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, err
+	}
+
+	var out []SearchResult
+	for _, r := range result.Organic {
+		out = append(out, SearchResult{Title: r.Title, URL: r.Link, Snippet: r.Snippet})
+	}
+	return out, nil
+}
+
+// ClassifyURL 判断 URL 类型
+// 返回: "tg_channel", "nav_site", "discard"
+func ClassifyURL(rawURL string) string {
+	// t.me 链接
+	if strings.Contains(rawURL, "t.me/") || strings.Contains(rawURL, "telegram.me/") {
+		return "tg_channel"
+	}
+
+	// 社交媒体/大站黑名单
+	blacklistDomains := []string{
+		"twitter.com", "facebook.com", "instagram.com", "youtube.com",
+		"google.com", "baidu.com", "weibo.com", "zhihu.com",
+		"github.com", "stackoverflow.com", "wikipedia.org",
+		"amazon.com", "taobao.com", "jd.com", "tmall.com",
+	}
+	for _, d := range blacklistDomains {
+		if strings.Contains(rawURL, d) {
+			return "discard"
+		}
+	}
+
+	// 黑名单扩展名
+	blacklistExt := []string{".apk", ".zip", ".pdf", ".exe", ".dmg", ".ipa"}
+	for _, ext := range blacklistExt {
+		if strings.HasSuffix(strings.ToLower(rawURL), ext) {
+			return "discard"
+		}
+	}
+
+	// 正向信号:导航站
+	navSignals := []string{"nav", "directory", "catalog", "list", "daohang", "dh"}
+	u := strings.ToLower(rawURL)
+	for _, sig := range navSignals {
+		if strings.Contains(u, sig) {
+			return "nav_site"
+		}
+	}
+
+	return "discard"
+}

+ 0 - 0
internal/service/.gitkeep


+ 146 - 0
internal/service/settings_service.go

@@ -0,0 +1,146 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/redis/go-redis/v9"
+	"gorm.io/gorm"
+
+	"spider/internal/model"
+)
+
+const settingsCacheKey = "spider:cache:settings"
+const settingsCacheTTL = 5 * time.Minute
+
+// SettingsService provides hot-reloadable access to managed_settings.
+type SettingsService struct {
+	db    *gorm.DB
+	redis *redis.Client
+}
+
+// NewSettingsService creates a new SettingsService.
+func NewSettingsService(db *gorm.DB, rdb *redis.Client) *SettingsService {
+	return &SettingsService{db: db, redis: rdb}
+}
+
+// Load 从数据库加载所有设置到 Redis 缓存
+func (s *SettingsService) Load(ctx context.Context) error {
+	var settings []model.ManagedSetting
+	if err := s.db.WithContext(ctx).Find(&settings).Error; err != nil {
+		return fmt.Errorf("load settings from db: %w", err)
+	}
+
+	if len(settings) == 0 {
+		// Nothing to cache; ensure any stale cache is cleared.
+		return s.redis.Del(ctx, settingsCacheKey).Err()
+	}
+
+	fields := make([]interface{}, 0, len(settings)*2)
+	for _, setting := range settings {
+		fields = append(fields, setting.KeyName, setting.Value)
+	}
+
+	pipe := s.redis.Pipeline()
+	pipe.HSet(ctx, settingsCacheKey, fields...)
+	pipe.Expire(ctx, settingsCacheKey, settingsCacheTTL)
+	_, err := pipe.Exec(ctx)
+	if err != nil {
+		return fmt.Errorf("cache settings to redis: %w", err)
+	}
+	return nil
+}
+
+// Get 获取设置值(先读 Redis 缓存,缓存不存在则读 DB 并回填)
+func (s *SettingsService) Get(ctx context.Context, key string) (string, error) {
+	// Try cache first.
+	val, err := s.redis.HGet(ctx, settingsCacheKey, key).Result()
+	if err == nil {
+		return val, nil
+	}
+
+	// Cache miss or Redis error — fall back to DB.
+	var setting model.ManagedSetting
+	if err := s.db.WithContext(ctx).Where("key_name = ?", key).First(&setting).Error; err != nil {
+		return "", fmt.Errorf("setting %q not found: %w", key, err)
+	}
+
+	// Back-fill the cache entry.
+	pipe := s.redis.Pipeline()
+	pipe.HSet(ctx, settingsCacheKey, key, setting.Value)
+	pipe.Expire(ctx, settingsCacheKey, settingsCacheTTL)
+	pipe.Exec(ctx) //nolint:errcheck — best-effort
+
+	return setting.Value, nil
+}
+
+// GetInt 获取整数类型设置
+func (s *SettingsService) GetInt(ctx context.Context, key string, defaultVal int) int {
+	raw, err := s.Get(ctx, key)
+	if err != nil {
+		return defaultVal
+	}
+	v, err := strconv.Atoi(raw)
+	if err != nil {
+		return defaultVal
+	}
+	return v
+}
+
+// GetFloat 获取浮点类型设置
+func (s *SettingsService) GetFloat(ctx context.Context, key string, defaultVal float64) float64 {
+	raw, err := s.Get(ctx, key)
+	if err != nil {
+		return defaultVal
+	}
+	v, err := strconv.ParseFloat(raw, 64)
+	if err != nil {
+		return defaultVal
+	}
+	return v
+}
+
+// GetBool 获取布尔类型设置
+func (s *SettingsService) GetBool(ctx context.Context, key string, defaultVal bool) bool {
+	raw, err := s.Get(ctx, key)
+	if err != nil {
+		return defaultVal
+	}
+	v, err := strconv.ParseBool(raw)
+	if err != nil {
+		return defaultVal
+	}
+	return v
+}
+
+// GetJSON 获取 JSON 类型设置,解析到 target
+func (s *SettingsService) GetJSON(ctx context.Context, key string, target interface{}) error {
+	raw, err := s.Get(ctx, key)
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal([]byte(raw), target)
+}
+
+// Set 更新设置(更新 DB + 清除缓存)
+func (s *SettingsService) Set(ctx context.Context, key, value string) error {
+	result := s.db.WithContext(ctx).Model(&model.ManagedSetting{}).
+		Where("key_name = ?", key).
+		Update("value", value)
+	if result.Error != nil {
+		return fmt.Errorf("update setting %q in db: %w", key, result.Error)
+	}
+	if result.RowsAffected == 0 {
+		return fmt.Errorf("setting %q not found", key)
+	}
+	// Invalidate cache so next read reloads from DB.
+	return s.Invalidate(ctx)
+}
+
+// Invalidate 清除缓存,下次读取时从 DB 加载
+func (s *SettingsService) Invalidate(ctx context.Context) error {
+	return s.redis.Del(ctx, settingsCacheKey).Err()
+}

+ 181 - 0
internal/service/task_service.go

@@ -0,0 +1,181 @@
+package service
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/hibiken/asynq"
+	"github.com/redis/go-redis/v9"
+	"golang.org/x/net/context"
+	"gorm.io/gorm"
+
+	"spider/internal/model"
+	"spider/internal/worker"
+)
+
+// StartTaskRequest is the payload for starting a new task.
+type StartTaskRequest struct {
+	TaskType   string          `json:"task_type" binding:"required"`
+	Target     string          `json:"target"`
+	TestRun    *worker.TestRun `json:"test_run"`
+	SkipPhases []string        `json:"skip_phases"`
+}
+
+// TaskService manages task lifecycle.
+type TaskService struct {
+	db     *gorm.DB
+	redis  *redis.Client
+	client *asynq.Client
+}
+
+// NewTaskService creates a TaskService. The asynq.Client is constructed from the
+// same Redis client options used by the rest of the application.
+func NewTaskService(db *gorm.DB, rdb *redis.Client) *TaskService {
+	opts := rdb.Options()
+	client := asynq.NewClient(asynq.RedisClientOpt{
+		Addr:     opts.Addr,
+		Password: opts.Password,
+		DB:       opts.DB,
+	})
+	return &TaskService{
+		db:     db,
+		redis:  rdb,
+		client: client,
+	}
+}
+
+// asynqTypeForTaskType maps model task_type to asynq task type constant.
+func asynqTypeForTaskType(taskType string) (string, error) {
+	m := map[string]string{
+		"full":     worker.TypeFullPipeline,
+		"discover": worker.TypeDiscover,
+		"search":   worker.TypeSearch,
+		"github":   worker.TypeGithub,
+		"scrape":   worker.TypeScrape,
+		"crawl":    worker.TypeCrawl,
+		"clean":    worker.TypeClean,
+		"score":    worker.TypeScore,
+	}
+	at, ok := m[taskType]
+	if !ok {
+		return "", fmt.Errorf("unknown task type: %s", taskType)
+	}
+	return at, nil
+}
+
+// StartTask validates, creates a Task record, and enqueues it via asynq.
+func (s *TaskService) StartTask(req StartTaskRequest) (*model.Task, error) {
+	// Check if a task of the same type is already running.
+	var count int64
+	if err := s.db.Model(&model.Task{}).
+		Where("task_type = ? AND status = ?", req.TaskType, "running").
+		Count(&count).Error; err != nil {
+		return nil, fmt.Errorf("check running tasks: %w", err)
+	}
+	if count > 0 {
+		return nil, fmt.Errorf("a %s task is already running", req.TaskType)
+	}
+
+	// Validate and get asynq type.
+	asynqType, err := asynqTypeForTaskType(req.TaskType)
+	if err != nil {
+		return nil, err
+	}
+
+	// Encode params.
+	paramsJSON, err := json.Marshal(req)
+	if err != nil {
+		return nil, fmt.Errorf("marshal params: %w", err)
+	}
+
+	// Create Task record in DB.
+	task := &model.Task{
+		TaskType: req.TaskType,
+		Status:   "pending",
+		Params:   paramsJSON,
+	}
+	if err := s.db.Create(task).Error; err != nil {
+		return nil, fmt.Errorf("create task record: %w", err)
+	}
+
+	// Build asynq payload.
+	payload := worker.TaskPayload{
+		TaskID:     task.ID,
+		Target:     req.Target,
+		TestRun:    req.TestRun,
+		SkipPhases: req.SkipPhases,
+	}
+	payloadBytes, err := json.Marshal(payload)
+	if err != nil {
+		return nil, fmt.Errorf("marshal payload: %w", err)
+	}
+
+	// Enqueue.
+	asynqTask := asynq.NewTask(asynqType, payloadBytes, asynq.Queue(worker.QueueDefault))
+	if _, err := s.client.Enqueue(asynqTask); err != nil {
+		// Roll back the DB record to failed.
+		s.db.Model(task).Updates(map[string]interface{}{"status": "failed", "error_msg": err.Error()})
+		return nil, fmt.Errorf("enqueue task: %w", err)
+	}
+
+	return task, nil
+}
+
+// StopTask marks the task as stopped in the DB and sets a Redis stop signal.
+func (s *TaskService) StopTask(taskID uint, force bool) error {
+	var task model.Task
+	if err := s.db.First(&task, taskID).Error; err != nil {
+		return fmt.Errorf("task not found: %w", err)
+	}
+
+	// Set the Redis stop signal so the worker can detect it.
+	stopKey := fmt.Sprintf("spider:task:stop:%d", taskID)
+	if err := s.redis.Set(context.Background(), stopKey, "1", time.Hour).Err(); err != nil {
+		return fmt.Errorf("set stop signal: %w", err)
+	}
+
+	// If force, immediately update the DB status.
+	if force {
+		finishedAt := time.Now()
+		if err := s.db.Model(&model.Task{}).Where("id = ?", taskID).Updates(map[string]interface{}{
+			"status":      "stopped",
+			"finished_at": &finishedAt,
+		}).Error; err != nil {
+			return fmt.Errorf("update task stopped: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// GetProgress reads progress from Redis and returns a merged map.
+func (s *TaskService) GetProgress(task *model.Task) map[string]interface{} {
+	result := make(map[string]interface{})
+
+	// Start with DB-stored progress if any.
+	if len(task.Progress) > 0 {
+		_ = json.Unmarshal(task.Progress, &result)
+	}
+
+	// Overlay with live Redis progress.
+	progressKey := fmt.Sprintf("spider:task:progress:%d", task.ID)
+	vals, err := s.redis.HGetAll(context.Background(), progressKey).Result()
+	if err == nil && len(vals) > 0 {
+		for k, v := range vals {
+			result[k] = v
+		}
+	}
+
+	return result
+}
+
+// IsStopRequested checks whether a stop signal has been set for the given task.
+func (s *TaskService) IsStopRequested(taskID uint) bool {
+	stopKey := fmt.Sprintf("spider:task:stop:%d", taskID)
+	val, err := s.redis.Get(context.Background(), stopKey).Result()
+	if err != nil {
+		return false
+	}
+	return val == "1"
+}

+ 0 - 0
internal/telegram/.gitkeep


+ 176 - 0
internal/telegram/account_manager.go

@@ -0,0 +1,176 @@
+package telegram
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/redis/go-redis/v9"
+)
+
+// ErrAllCooling 所有账号都在冷却
+var ErrAllCooling = errors.New("all TG accounts are cooling down")
+
+// ErrNoAccounts 没有配置账号
+var ErrNoAccounts = errors.New("no TG accounts configured")
+
+// ManagedAccount 带状态的账号
+type ManagedAccount struct {
+	Account   Account
+	Client    *Client
+	CoolUntil time.Time // 冷却截止时间
+	InUse     bool
+}
+
+// AccountManager 管理多个 TG 账号
+type AccountManager struct {
+	accounts []*ManagedAccount
+	mu       sync.Mutex
+	redis    *redis.Client
+}
+
+// NewAccountManager 创建 AccountManager
+func NewAccountManager(accounts []Account, rdb *redis.Client) *AccountManager {
+	m := &AccountManager{
+		redis: rdb,
+	}
+	m.Init(accounts)
+	return m
+}
+
+// Init 初始化所有账号的客户端(不连接),从 Redis 恢复冷却状态
+func (m *AccountManager) Init(accounts []Account) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	m.accounts = make([]*ManagedAccount, 0, len(accounts))
+	for _, acc := range accounts {
+		m.accounts = append(m.accounts, &ManagedAccount{
+			Account: acc,
+			Client:  New(acc),
+		})
+	}
+	m.loadCooldowns()
+}
+
+// Acquire 获取一个可用账号(非冷却中、非使用中)
+// 如果所有账号都在冷却,返回等待时间最短的账号的剩余冷却时间作为错误
+func (m *AccountManager) Acquire(ctx context.Context) (*ManagedAccount, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if len(m.accounts) == 0 {
+		return nil, ErrNoAccounts
+	}
+
+	now := time.Now()
+	var soonestCool *ManagedAccount
+
+	for _, acc := range m.accounts {
+		if acc.InUse {
+			continue
+		}
+		if now.Before(acc.CoolUntil) {
+			// Still cooling; track shortest cooldown
+			if soonestCool == nil || acc.CoolUntil.Before(soonestCool.CoolUntil) {
+				soonestCool = acc
+			}
+			continue
+		}
+		// Available
+		acc.InUse = true
+		return acc, nil
+	}
+
+	if soonestCool != nil {
+		remaining := time.Until(soonestCool.CoolUntil)
+		return nil, fmt.Errorf("%w: shortest cooldown %s", ErrAllCooling, remaining.Round(time.Second))
+	}
+
+	// All accounts are InUse
+	return nil, fmt.Errorf("%w", ErrAllCooling)
+}
+
+// Release 归还账号
+// floodWait > 0 时标记冷却,写入 Redis 持久化
+// Redis key: spider:tg:floodwait:{phone}  value: 冷却截止 Unix 时间戳  TTL: 冷却时长
+func (m *AccountManager) Release(acc *ManagedAccount, floodWait time.Duration) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	acc.InUse = false
+	if floodWait > 0 {
+		coolUntil := time.Now().Add(floodWait)
+		acc.CoolUntil = coolUntil
+		m.saveCooldown(acc.Account.Phone, coolUntil)
+	}
+}
+
+// HandleFloodWait FloodWait 处理策略:
+// ≤60s  → 标记当前账号冷却,等待后重试(返回 nil error 表示可重试)
+// >60s  → 标记当前账号冷却,切换其他账号(返回 nil,调用方重新 Acquire)
+// >300s → 标记所有账号最少冷却 300s,返回 ErrAllCooling
+func (m *AccountManager) HandleFloodWait(acc *ManagedAccount, waitSeconds int) error {
+	wait := time.Duration(waitSeconds) * time.Second
+
+	if waitSeconds > 300 {
+		// Mark all accounts with at least 300s cooling
+		m.mu.Lock()
+		minCool := time.Now().Add(300 * time.Second)
+		for _, a := range m.accounts {
+			a.InUse = false
+			if a.CoolUntil.Before(minCool) {
+				a.CoolUntil = minCool
+				m.saveCooldown(a.Account.Phone, a.CoolUntil)
+			}
+		}
+		m.mu.Unlock()
+		return ErrAllCooling
+	}
+
+	if waitSeconds > 60 {
+		// Mark current account cooling, caller should re-Acquire
+		m.Release(acc, wait)
+		return nil
+	}
+
+	// ≤60s: mark cooling and wait
+	m.Release(acc, wait)
+	// Caller is responsible for waiting; we just mark the cooldown
+	return nil
+}
+
+// loadCooldowns 从 Redis 加载冷却状态(在持有锁时调用)
+func (m *AccountManager) loadCooldowns() {
+	if m.redis == nil {
+		return
+	}
+	ctx := context.Background()
+	for _, acc := range m.accounts {
+		key := "spider:tg:floodwait:" + acc.Account.Phone
+		val, err := m.redis.Get(ctx, key).Int64()
+		if err != nil {
+			continue
+		}
+		coolUntil := time.Unix(val, 0)
+		if time.Now().Before(coolUntil) {
+			acc.CoolUntil = coolUntil
+		}
+	}
+}
+
+// saveCooldown 保存冷却状态到 Redis(在持有锁时调用)
+func (m *AccountManager) saveCooldown(phone string, coolUntil time.Time) {
+	if m.redis == nil {
+		return
+	}
+	ctx := context.Background()
+	key := "spider:tg:floodwait:" + phone
+	ttl := time.Until(coolUntil)
+	if ttl <= 0 {
+		return
+	}
+	_ = m.redis.Set(ctx, key, coolUntil.Unix(), ttl).Err()
+}

+ 418 - 0
internal/telegram/client.go

@@ -0,0 +1,418 @@
+package telegram
+
+import (
+	"context"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gotd/td/session"
+	"github.com/gotd/td/telegram"
+	"github.com/gotd/td/tg"
+	"github.com/gotd/td/tgerr"
+)
+
+var tmeRegexp = regexp.MustCompile(`https?://t\.me/[^\s"'<>)\]]+`)
+
+// Client TG 客户端
+type Client struct {
+	account     Account
+	sessionPath string
+
+	mu     sync.Mutex
+	tgc    *telegram.Client
+	api    *tg.Client
+	cancel context.CancelFunc
+	ready  chan struct{} // closed when connected
+	runErr error
+}
+
+// New 创建客户端(不连接,只初始化)
+func New(account Account) *Client {
+	return &Client{
+		account:     account,
+		sessionPath: account.SessionFile,
+		ready:       make(chan struct{}),
+	}
+}
+
+// Connect 连接并认证(从 session 文件恢复)
+// session 文件不存在时返回错误(不做交互式登录,session 需要预先生成)
+func (c *Client) Connect(ctx context.Context) error {
+	storage := &session.FileStorage{Path: c.sessionPath}
+
+	opts := telegram.Options{
+		SessionStorage: storage,
+		NoUpdates:      true,
+	}
+
+	client := telegram.NewClient(c.account.AppID, c.account.AppHash, opts)
+
+	runCtx, cancel := context.WithCancel(ctx)
+
+	c.mu.Lock()
+	c.tgc = client
+	c.cancel = cancel
+	c.ready = make(chan struct{})
+	c.runErr = nil
+	readyCh := c.ready
+	c.mu.Unlock()
+
+	errCh := make(chan error, 1)
+
+	go func() {
+		err := client.Run(runCtx, func(ctx context.Context) error {
+			c.mu.Lock()
+			c.api = client.API()
+			close(readyCh)
+			c.mu.Unlock()
+
+			// Block until context is cancelled (Disconnect called)
+			<-ctx.Done()
+			return ctx.Err()
+		})
+		c.mu.Lock()
+		c.runErr = err
+		c.mu.Unlock()
+		errCh <- err
+	}()
+
+	// Wait for ready or error
+	select {
+	case <-readyCh:
+		return nil
+	case err := <-errCh:
+		if err != nil && err != context.Canceled {
+			return err
+		}
+		return nil
+	case <-ctx.Done():
+		cancel()
+		return ctx.Err()
+	}
+}
+
+// Disconnect 断开连接
+func (c *Client) Disconnect() {
+	c.mu.Lock()
+	cancel := c.cancel
+	c.mu.Unlock()
+
+	if cancel != nil {
+		cancel()
+	}
+}
+
+// waitReady waits for the client to be connected and returns the api client
+func (c *Client) waitReady(ctx context.Context) (*tg.Client, error) {
+	c.mu.Lock()
+	readyCh := c.ready
+	api := c.api
+	c.mu.Unlock()
+
+	if api != nil {
+		return api, nil
+	}
+
+	select {
+	case <-readyCh:
+		c.mu.Lock()
+		api = c.api
+		c.mu.Unlock()
+		return api, nil
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+}
+
+// GetChannelInfo 获取频道/用户信息,通过用户名查找
+func (c *Client) GetChannelInfo(ctx context.Context, username string) (*ChannelInfo, error) {
+	api, err := c.waitReady(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	username = strings.TrimPrefix(username, "@")
+
+	resolved, err := api.ContactsResolveUsername(ctx, &tg.ContactsResolveUsernameRequest{
+		Username: username,
+	})
+	if err != nil {
+		return nil, wrapFloodWait(err)
+	}
+
+	info := &ChannelInfo{Username: username}
+
+	// Look for channel/chat in the resolved chats
+	for _, ch := range resolved.Chats {
+		switch v := ch.(type) {
+		case *tg.Channel:
+			title := v.Title
+			info.Title = title
+			info.IsChannel = v.GetBroadcast()
+			info.IsGroup = v.GetMegagroup()
+			if count, ok := v.GetParticipantsCount(); ok {
+				info.MemberCount = count
+			}
+			// Get full channel info for About
+			accessHash, hasHash := v.GetAccessHash()
+			if hasHash {
+				full, ferr := api.ChannelsGetFullChannel(ctx, &tg.InputChannel{
+					ChannelID:  v.GetID(),
+					AccessHash: accessHash,
+				})
+				if ferr == nil {
+					if cf, ok := full.FullChat.(*tg.ChannelFull); ok {
+						info.About = cf.GetAbout()
+						if count, ok := cf.GetParticipantsCount(); ok && info.MemberCount == 0 {
+							info.MemberCount = count
+						}
+					}
+				}
+			}
+			return info, nil
+		case *tg.Chat:
+			info.Title = v.Title
+			info.IsGroup = true
+			info.MemberCount = v.ParticipantsCount
+			return info, nil
+		}
+	}
+
+	return info, nil
+}
+
+// GetMessages 获取频道历史消息
+// offsetID: 从哪条消息开始(断点续传)
+// limit: 最多取多少条
+// 返回的消息按 ID 从小到大排序
+func (c *Client) GetMessages(ctx context.Context, username string, offsetID, limit int) ([]Message, error) {
+	api, err := c.waitReady(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	username = strings.TrimPrefix(username, "@")
+
+	peer, err := c.resolveInputPeer(ctx, api, username)
+	if err != nil {
+		return nil, err
+	}
+
+	result, err := api.MessagesGetHistory(ctx, &tg.MessagesGetHistoryRequest{
+		Peer:     peer,
+		OffsetID: offsetID,
+		Limit:    limit,
+	})
+	if err != nil {
+		return nil, wrapFloodWait(err)
+	}
+
+	return extractMessages(result), nil
+}
+
+// GetPinnedMessages 获取置顶消息
+func (c *Client) GetPinnedMessages(ctx context.Context, username string) ([]Message, error) {
+	api, err := c.waitReady(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	username = strings.TrimPrefix(username, "@")
+
+	peer, err := c.resolveInputPeer(ctx, api, username)
+	if err != nil {
+		return nil, err
+	}
+
+	result, err := api.MessagesSearch(ctx, &tg.MessagesSearchRequest{
+		Peer:   peer,
+		Filter: &tg.InputMessagesFilterPinned{},
+		Limit:  100,
+	})
+	if err != nil {
+		return nil, wrapFloodWait(err)
+	}
+
+	return extractMessages(result), nil
+}
+
+// VerifyUser 验证用户名是否存在,返回用户信息
+func (c *Client) VerifyUser(ctx context.Context, username string) (*UserInfo, error) {
+	api, err := c.waitReady(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	username = strings.TrimPrefix(username, "@")
+
+	resolved, err := api.ContactsResolveUsername(ctx, &tg.ContactsResolveUsernameRequest{
+		Username: username,
+	})
+	if err != nil {
+		if tgerr.Is(err, "USERNAME_NOT_OCCUPIED", "USERNAME_INVALID") {
+			return &UserInfo{Username: username, Exists: false}, nil
+		}
+		return nil, wrapFloodWait(err)
+	}
+
+	// Check if the peer is a user
+	if _, ok := resolved.Peer.(*tg.PeerUser); ok {
+		for _, u := range resolved.Users {
+			if user, ok := u.(*tg.User); ok {
+				info := &UserInfo{
+					ID:        user.GetID(),
+					Username:  username,
+					IsBot:     user.GetBot(),
+					IsPremium: user.GetPremium(),
+					Exists:    true,
+				}
+				if fn, ok := user.GetFirstName(); ok {
+					info.FirstName = fn
+				}
+				if ln, ok := user.GetLastName(); ok {
+					info.LastName = ln
+				}
+				if status, ok := user.GetStatus(); ok {
+					if offline, ok := status.(*tg.UserStatusOffline); ok {
+						t := time.Unix(int64(offline.GetWasOnline()), 0)
+						info.LastOnline = &t
+					}
+				}
+				return info, nil
+			}
+		}
+	}
+
+	// Check if it's a channel or group
+	for _, ch := range resolved.Chats {
+		switch v := ch.(type) {
+		case *tg.Channel:
+			return &UserInfo{
+				ID:        v.GetID(),
+				Username:  username,
+				IsChannel: v.GetBroadcast(),
+				IsGroup:   v.GetMegagroup(),
+				Exists:    true,
+			}, nil
+		case *tg.Chat:
+			return &UserInfo{
+				ID:      v.ID,
+				IsGroup: true,
+				Exists:  true,
+			}, nil
+		}
+	}
+
+	return &UserInfo{Username: username, Exists: false}, nil
+}
+
+// resolveInputPeer resolves a username to an InputPeer
+func (c *Client) resolveInputPeer(ctx context.Context, api *tg.Client, username string) (tg.InputPeerClass, error) {
+	resolved, err := api.ContactsResolveUsername(ctx, &tg.ContactsResolveUsernameRequest{
+		Username: username,
+	})
+	if err != nil {
+		return nil, wrapFloodWait(err)
+	}
+
+	switch p := resolved.Peer.(type) {
+	case *tg.PeerChannel:
+		for _, ch := range resolved.Chats {
+			if channel, ok := ch.(*tg.Channel); ok && channel.GetID() == p.ChannelID {
+				accessHash, _ := channel.GetAccessHash()
+				return &tg.InputPeerChannel{
+					ChannelID:  p.ChannelID,
+					AccessHash: accessHash,
+				}, nil
+			}
+		}
+		return &tg.InputPeerChannel{ChannelID: p.ChannelID}, nil
+	case *tg.PeerUser:
+		for _, u := range resolved.Users {
+			if user, ok := u.(*tg.User); ok && user.GetID() == p.UserID {
+				accessHash, _ := user.GetAccessHash()
+				return &tg.InputPeerUser{
+					UserID:     p.UserID,
+					AccessHash: accessHash,
+				}, nil
+			}
+		}
+		return &tg.InputPeerUser{UserID: p.UserID}, nil
+	case *tg.PeerChat:
+		return &tg.InputPeerChat{ChatID: p.ChatID}, nil
+	}
+
+	return &tg.InputPeerEmpty{}, nil
+}
+
+// extractMessages extracts messages from a MessagesMessagesClass
+func extractMessages(result tg.MessagesMessagesClass) []Message {
+	var rawMsgs []tg.MessageClass
+	switch v := result.(type) {
+	case *tg.MessagesMessages:
+		rawMsgs = v.Messages
+	case *tg.MessagesMessagesSlice:
+		rawMsgs = v.Messages
+	case *tg.MessagesChannelMessages:
+		rawMsgs = v.Messages
+	case *tg.MessagesMessagesNotModified:
+		return nil
+	}
+
+	var msgs []Message
+	for _, raw := range rawMsgs {
+		switch m := raw.(type) {
+		case *tg.Message:
+			msg := Message{
+				ID:        m.GetID(),
+				Text:      m.GetMessage(),
+				IsService: false,
+			}
+
+			// Extract forward source channel username
+			if fwd, ok := m.GetFwdFrom(); ok {
+				if fromID, ok := fwd.GetFromID(); ok {
+					if peerCh, ok := fromID.(*tg.PeerChannel); ok {
+						_ = peerCh // We'd need channel map to resolve username; skip for now
+					}
+				}
+			}
+
+			// Extract t.me links from text
+			msg.Links = tmeRegexp.FindAllString(msg.Text, -1)
+
+			msgs = append(msgs, msg)
+		case *tg.MessageService:
+			msgs = append(msgs, Message{
+				ID:        m.GetID(),
+				IsService: true,
+			})
+		}
+	}
+
+	// Sort by ID ascending
+	sort.Slice(msgs, func(i, j int) bool {
+		return msgs[i].ID < msgs[j].ID
+	})
+
+	return msgs
+}
+
+// isFloodWait 检查错误是否是 FloodWait,提取等待时间
+func isFloodWait(err error) (bool, int) {
+	if d, ok := tgerr.AsFloodWait(err); ok {
+		return true, int(d.Seconds())
+	}
+	return false, 0
+}
+
+// wrapFloodWait wraps a FloodWait error into FloodWaitError
+func wrapFloodWait(err error) error {
+	if ok, secs := isFloodWait(err); ok {
+		return &FloodWaitError{Seconds: secs}
+	}
+	return err
+}

+ 56 - 0
internal/telegram/types.go

@@ -0,0 +1,56 @@
+package telegram
+
+import (
+	"fmt"
+	"time"
+)
+
+// Account TG 账号信息
+type Account struct {
+	Phone       string
+	SessionFile string
+	AppID       int
+	AppHash     string
+}
+
+// ChannelInfo 频道基本信息
+type ChannelInfo struct {
+	Username    string
+	Title       string
+	MemberCount int
+	About       string
+	IsChannel   bool
+	IsGroup     bool
+}
+
+// Message TG 消息
+type Message struct {
+	ID                 int
+	Text               string
+	ForwardFromChannel string // forward 来源频道用户名
+	Links              []string // 消息中的 t.me 链接
+	IsService          bool // 系统消息
+}
+
+// UserInfo TG 用户信息(验证商户时用)
+type UserInfo struct {
+	ID        int64
+	Username  string
+	FirstName string
+	LastName  string
+	IsBot     bool
+	IsPremium bool
+	LastOnline *time.Time
+	IsChannel bool
+	IsGroup   bool
+	Exists    bool // false 表示不存在/已注销
+}
+
+// FloodWaitError FloodWait 错误,包含等待时长
+type FloodWaitError struct {
+	Seconds int
+}
+
+func (e *FloodWaitError) Error() string {
+	return fmt.Sprintf("FloodWait: %d seconds", e.Seconds)
+}

+ 0 - 0
internal/worker/.gitkeep


+ 325 - 0
internal/worker/worker.go

@@ -0,0 +1,325 @@
+package worker
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log"
+	"time"
+
+	"github.com/hibiken/asynq"
+	"github.com/redis/go-redis/v9"
+	"gorm.io/gorm"
+
+	"spider/internal/llm"
+	"spider/internal/model"
+	"spider/internal/pipeline"
+	"spider/internal/search"
+	"spider/internal/telegram"
+)
+
+const (
+	QueueDefault = "default"
+
+	TypeFullPipeline = "task:full"
+	TypeDiscover     = "task:discover"
+	TypeSearch       = "task:search"
+	TypeGithub       = "task:github"
+	TypeScrape       = "task:scrape"
+	TypeCrawl        = "task:crawl"
+	TypeClean        = "task:clean"
+	TypeScore        = "task:score"
+)
+
+// lockKeyForType returns the Redis lock key for a given task type.
+func lockKeyForType(taskType string) string {
+	if taskType == "full" {
+		return "spider:task:lock:global"
+	}
+	return fmt.Sprintf("spider:task:lock:%s", taskType)
+}
+
+// progressKey returns the Redis hash key for task progress.
+func progressKey(taskID uint) string {
+	return fmt.Sprintf("spider:task:progress:%d", taskID)
+}
+
+// stopKey returns the Redis key used to signal stop for a task.
+func stopKey(taskID uint) string {
+	return fmt.Sprintf("spider:task:stop:%d", taskID)
+}
+
+// TaskPayload is the asynq task payload.
+type TaskPayload struct {
+	TaskID     uint     `json:"task_id"`
+	Target     string   `json:"target,omitempty"`
+	TestRun    *TestRun `json:"test_run,omitempty"`
+	SkipPhases []string `json:"skip_phases,omitempty"`
+}
+
+// TestRun limits items processed during a test run.
+type TestRun struct {
+	ItemLimit    int `json:"item_limit"`
+	MessageLimit int `json:"message_limit"`
+}
+
+// Worker wraps the asynq server.
+type Worker struct {
+	server       *asynq.Server
+	mux          *asynq.ServeMux
+	db           *gorm.DB
+	redis        *redis.Client
+	tgManager    *telegram.AccountManager
+	llmClient    *llm.Client
+	settings     pipeline.Settings
+	serperClient *search.SerperClient
+	githubToken  string
+	pipeline     *pipeline.Runner
+}
+
+// New creates and configures a new Worker.
+func New(redisAddr, redisPassword string, redisDB int, db *gorm.DB, rdb *redis.Client, tgManager *telegram.AccountManager, llmClient *llm.Client, settings pipeline.Settings, serperClient *search.SerperClient, githubToken string) *Worker {
+	srv := asynq.NewServer(
+		asynq.RedisClientOpt{
+			Addr:     redisAddr,
+			Password: redisPassword,
+			DB:       redisDB,
+		},
+		asynq.Config{
+			Concurrency: 4,
+			Queues: map[string]int{
+				QueueDefault: 10,
+			},
+		},
+	)
+
+	runner := pipeline.NewRunner(db, rdb)
+	runner.RegisterPhase(pipeline.NewDiscoverPhase(db, tgManager, settings))
+	runner.RegisterPhase(pipeline.NewSearchPhase(db, serperClient, settings))
+	runner.RegisterPhase(pipeline.NewGithubPhase(db, githubToken, settings))
+	runner.RegisterPhase(pipeline.NewScrapePhase(db, tgManager, llmClient, settings, rdb))
+	runner.RegisterPhase(pipeline.NewCrawlPhase(db, llmClient, settings))
+	runner.RegisterPhase(pipeline.NewCleanPhase(db, tgManager, settings))
+	runner.RegisterPhase(pipeline.NewScorePhase(db))
+
+	w := &Worker{
+		server:       srv,
+		mux:          asynq.NewServeMux(),
+		db:           db,
+		redis:        rdb,
+		tgManager:    tgManager,
+		llmClient:    llmClient,
+		settings:     settings,
+		serperClient: serperClient,
+		githubToken:  githubToken,
+		pipeline:     runner,
+	}
+
+	// Register all task types to the same generic handler.
+	w.mux.HandleFunc(TypeFullPipeline, w.processTask)
+	w.mux.HandleFunc(TypeDiscover, w.processTask)
+	w.mux.HandleFunc(TypeSearch, w.processTask)
+	w.mux.HandleFunc(TypeGithub, w.processTask)
+	w.mux.HandleFunc(TypeScrape, w.processTask)
+	w.mux.HandleFunc(TypeCrawl, w.processTask)
+	w.mux.HandleFunc(TypeClean, w.processTask)
+	w.mux.HandleFunc(TypeScore, w.processTask)
+
+	return w
+}
+
+// acquireLock tries to acquire a Redis SET NX EX lock. Returns true on success.
+func (w *Worker) acquireLock(ctx context.Context, lockKey string) bool {
+	ok, err := w.redis.SetNX(ctx, lockKey, "1", 24*time.Hour).Result()
+	if err != nil {
+		log.Printf("[worker] acquireLock error key=%s: %v", lockKey, err)
+		return false
+	}
+	return ok
+}
+
+// releaseLock deletes the Redis lock key.
+func (w *Worker) releaseLock(ctx context.Context, lockKey string) {
+	if err := w.redis.Del(ctx, lockKey).Err(); err != nil {
+		log.Printf("[worker] releaseLock error key=%s: %v", lockKey, err)
+	}
+}
+
+// writeLog appends a timestamped log line to the Redis list for this task.
+// Keeps only the last 500 entries and sets a 24-hour TTL.
+func (w *Worker) writeLog(ctx context.Context, taskID uint, msg string) {
+	key := fmt.Sprintf("spider:task:logs:%d", taskID)
+	ts := time.Now().Format("15:04:05")
+	line := fmt.Sprintf("[%s] %s", ts, msg)
+	w.redis.RPush(ctx, key, line)
+	w.redis.LTrim(ctx, key, -500, -1)
+	w.redis.Expire(ctx, key, 24*time.Hour)
+}
+
+// writeProgress writes task progress fields to Redis.
+func (w *Worker) writeProgress(ctx context.Context, taskID uint, phase string, current, total int, message string) {
+	key := progressKey(taskID)
+	now := time.Now().UTC().Format(time.RFC3339)
+	err := w.redis.HSet(ctx, key,
+		"phase", phase,
+		"current", current,
+		"total", total,
+		"message", message,
+		"updated_at", now,
+	).Err()
+	if err != nil {
+		log.Printf("[worker] writeProgress error task=%d: %v", taskID, err)
+		return
+	}
+	w.redis.Expire(ctx, key, 24*time.Hour)
+}
+
+// isStopRequested checks whether a stop signal has been set for this task.
+func (w *Worker) isStopRequested(ctx context.Context, taskID uint) bool {
+	val, err := w.redis.Get(ctx, stopKey(taskID)).Result()
+	if err != nil {
+		return false
+	}
+	return val == "1"
+}
+
+// taskTypeFromAsynqType converts an asynq type string to the model task_type value.
+func taskTypeFromAsynqType(asynqType string) string {
+	switch asynqType {
+	case TypeFullPipeline:
+		return "full"
+	case TypeDiscover:
+		return "discover"
+	case TypeSearch:
+		return "search"
+	case TypeGithub:
+		return "github"
+	case TypeScrape:
+		return "scrape"
+	case TypeCrawl:
+		return "crawl"
+	case TypeClean:
+		return "clean"
+	case TypeScore:
+		return "score"
+	default:
+		return asynqType
+	}
+}
+
+// processTask is the core handler invoked for every registered task type.
+func (w *Worker) processTask(ctx context.Context, t *asynq.Task) error {
+	var payload TaskPayload
+	if err := json.Unmarshal(t.Payload(), &payload); err != nil {
+		return fmt.Errorf("unmarshal payload: %w", err)
+	}
+
+	taskID := payload.TaskID
+	taskType := taskTypeFromAsynqType(t.Type())
+	lockKey := lockKeyForType(taskType)
+
+	log.Printf("[worker] processing task id=%d type=%s", taskID, taskType)
+
+	// Acquire distributed lock.
+	if !w.acquireLock(ctx, lockKey) {
+		return fmt.Errorf("another %s task is already running, skipping", taskType)
+	}
+	defer w.releaseLock(ctx, lockKey)
+
+	// 1. Update task status → running.
+	now := time.Now()
+	if err := w.db.WithContext(ctx).Model(&model.Task{}).Where("id = ?", taskID).Updates(map[string]interface{}{
+		"status":     "running",
+		"started_at": &now,
+	}).Error; err != nil {
+		return fmt.Errorf("update task running: %w", err)
+	}
+
+	// 2. Write initial progress to Redis and log task start.
+	w.writeProgress(ctx, taskID, taskType, 0, 0, "任务启动中...")
+	w.writeLog(ctx, taskID, fmt.Sprintf("任务开始: %s (id=%d)", taskType, taskID))
+
+	// 3. Fetch the full task record for the pipeline.
+	var task model.Task
+	if err := w.db.WithContext(ctx).First(&task, taskID).Error; err != nil {
+		return fmt.Errorf("fetch task record: %w", err)
+	}
+
+	// 4. Build pipeline options from payload.
+	opts := &pipeline.Options{
+		Target:     payload.Target,
+		SkipPhases: payload.SkipPhases,
+	}
+	if payload.TestRun != nil {
+		opts.TestRun = &pipeline.TestRun{
+			ItemLimit:    payload.TestRun.ItemLimit,
+			MessageLimit: payload.TestRun.MessageLimit,
+		}
+	}
+
+	// Wire progress reporter so pipeline phases report through writeProgress and writeLog.
+	w.pipeline.SetProgressReporter(func(phase string, current, total int, message string) {
+		// Also check for stop signal on each progress report.
+		if w.isStopRequested(ctx, taskID) {
+			log.Printf("[worker] task %d stop requested during phase=%s", taskID, phase)
+		}
+		w.writeProgress(ctx, taskID, phase, current, total, message)
+		if message != "" {
+			w.writeLog(ctx, taskID, fmt.Sprintf("[%s] %d/%d %s", phase, current, total, message))
+		}
+	})
+
+	// 5. Run the pipeline. For full tasks, phase failures are logged but non-fatal.
+	if pipelineErr := w.pipeline.Run(ctx, &task, opts); pipelineErr != nil {
+		// Single-phase tasks propagate errors; full-pipeline errors are already handled inside Run.
+		log.Printf("[worker] pipeline error task=%d: %v", taskID, pipelineErr)
+		errorTime := time.Now()
+		w.db.WithContext(ctx).Model(&model.Task{}).Where("id = ?", taskID).Updates(map[string]interface{}{
+			"status":      "failed",
+			"finished_at": &errorTime,
+			"error_msg":   pipelineErr.Error(),
+		})
+		w.writeProgress(ctx, taskID, taskType, 0, 0, "任务失败: "+pipelineErr.Error())
+		w.writeLog(ctx, taskID, "任务失败: "+pipelineErr.Error())
+		return pipelineErr
+	}
+
+	// Check for stop request after pipeline finishes.
+	if w.isStopRequested(ctx, taskID) {
+		log.Printf("[worker] task %d stop requested", taskID)
+		stopTime := time.Now()
+		w.db.WithContext(ctx).Model(&model.Task{}).Where("id = ?", taskID).Updates(map[string]interface{}{
+			"status":      "stopped",
+			"finished_at": &stopTime,
+		})
+		w.writeProgress(ctx, taskID, taskType, 0, 0, "任务已停止")
+		w.writeLog(ctx, taskID, "任务已停止")
+		return nil
+	}
+
+	// 6. Mark task as completed.
+	finishedAt := time.Now()
+	resultJSON, _ := json.Marshal(map[string]interface{}{"message": "task completed successfully"})
+	if err := w.db.WithContext(ctx).Model(&model.Task{}).Where("id = ?", taskID).Updates(map[string]interface{}{
+		"status":      "completed",
+		"finished_at": &finishedAt,
+		"result":      resultJSON,
+	}).Error; err != nil {
+		return fmt.Errorf("update task completed: %w", err)
+	}
+
+	w.writeProgress(ctx, taskID, taskType, 100, 100, "任务完成")
+	w.writeLog(ctx, taskID, "任务完成")
+	log.Printf("[worker] task %d completed", taskID)
+	return nil
+}
+
+// Start runs the asynq server (blocking).
+func (w *Worker) Start() error {
+	return w.server.Run(w.mux)
+}
+
+// Stop gracefully shuts down the asynq server.
+func (w *Worker) Stop() {
+	w.server.Shutdown()
+}

BIN
server.exe


+ 0 - 0
sessions/.gitkeep


+ 0 - 0
web/.gitkeep


+ 12 - 0
web/index.html

@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>商户查找系统</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>

+ 27 - 0
web/package.json

@@ -0,0 +1,27 @@
+{
+  "name": "spider-web",
+  "private": true,
+  "version": "0.0.1",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc && vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
+    "react-router-dom": "^6.21.0",
+    "antd": "^5.12.0",
+    "@ant-design/icons": "^5.2.6",
+    "axios": "^1.6.2",
+    "zustand": "^4.4.7"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
+    "@vitejs/plugin-react": "^4.2.1",
+    "typescript": "^5.2.2",
+    "vite": "^5.0.8"
+  }
+}

+ 33 - 0
web/src/App.tsx

@@ -0,0 +1,33 @@
+import { BrowserRouter, Routes, Route } from 'react-router-dom'
+import Layout from './components/Layout'
+import Dashboard from './pages/Dashboard'
+import Tasks from './pages/Tasks'
+import MerchantsRaw from './pages/MerchantsRaw'
+import MerchantsClean from './pages/MerchantsClean'
+import Channels from './pages/Channels'
+import NavSites from './pages/NavSites'
+import Seeds from './pages/Seeds'
+import Keywords from './pages/Keywords'
+import Settings from './pages/Settings'
+import Logs from './pages/Logs'
+
+export default function App() {
+  return (
+    <BrowserRouter>
+      <Layout>
+        <Routes>
+          <Route path="/" element={<Dashboard />} />
+          <Route path="/tasks" element={<Tasks />} />
+          <Route path="/merchants/raw" element={<MerchantsRaw />} />
+          <Route path="/merchants/clean" element={<MerchantsClean />} />
+          <Route path="/channels" element={<Channels />} />
+          <Route path="/nav-sites" element={<NavSites />} />
+          <Route path="/seeds" element={<Seeds />} />
+          <Route path="/keywords" element={<Keywords />} />
+          <Route path="/settings" element={<Settings />} />
+          <Route path="/logs" element={<Logs />} />
+        </Routes>
+      </Layout>
+    </BrowserRouter>
+  )
+}

+ 177 - 0
web/src/api/index.ts

@@ -0,0 +1,177 @@
+import axios from 'axios'
+
+const api = axios.create({ baseURL: '/api/v1' })
+
+// 响应拦截器:提取 data 字段
+api.interceptors.response.use(
+  (res) => res.data,
+  (error) => {
+    return Promise.reject(error)
+  }
+)
+
+// Types
+export interface StartTaskRequest {
+  task_type: 'full' | 'discover' | 'search' | 'github' | 'scrape' | 'crawl' | 'clean' | 'score'
+  target?: string
+  test_run?: { item_limit: number; message_limit: number }
+  skip_phases?: string[]
+}
+
+export interface ApiResponse<T> {
+  code: number
+  message: string
+  data: T
+}
+
+export interface PagedResponse<T> {
+  items: T[]
+  total: number
+  page: number
+  page_size: number
+}
+
+export interface Task {
+  id: number
+  task_type: string
+  status: string
+  params: Record<string, unknown>
+  current_phase: string
+  progress: number
+  created_at: string
+  completed_at: string | null
+  error: string | null
+}
+
+export interface MerchantRaw {
+  id: number
+  tg_username: string
+  merchant_name: string
+  source_type: string
+  status: string
+  industry: string
+  created_at: string
+}
+
+export interface MerchantClean {
+  id: number
+  tg_username: string
+  merchant_name: string
+  status: string
+  industry: string
+  quality_score: number
+  is_premium: boolean
+  activity_score: number
+  created_at: string
+}
+
+export interface Channel {
+  id: number
+  username: string
+  title: string
+  member_count: number
+  source: string
+  status: string
+  relevance_score: number
+  created_at: string
+}
+
+export interface NavSite {
+  id: number
+  url: string
+  domain: string
+  status: string
+  filter_reason: string | null
+  merchant_count: number
+  created_at: string
+}
+
+export interface Seed {
+  id: number
+  channel_name: string
+  status: boolean
+  note: string
+  created_at: string
+}
+
+export interface Keyword {
+  id: number
+  keyword: string
+  category: string
+  status: boolean
+  created_at: string
+}
+
+export interface Setting {
+  key: string
+  value: string
+  description: string
+  updated_at: string
+}
+
+export interface DashboardData {
+  channel_count: number
+  merchant_raw_count: number
+  merchant_clean_count: number
+  merchant_valid_count: number
+  nav_site_count: number
+  seed_count: number
+  running_task: Task | null
+  recent_tasks: Task[]
+}
+
+export interface ChannelStats {
+  source_distribution: Record<string, number>
+  status_distribution: Record<string, number>
+}
+
+// Dashboard
+export const getDashboard = () => api.get<unknown, ApiResponse<DashboardData>>('/dashboard')
+
+// Tasks
+export const getTasks = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<Task>>>('/tasks', { params })
+export const getTask = (id: number) => api.get<unknown, ApiResponse<Task>>(`/tasks/${id}`)
+export const startTask = (data: StartTaskRequest) =>
+  api.post<unknown, ApiResponse<Task>>('/tasks/start', data)
+export const stopTask = (id: number, force = false) =>
+  api.post<unknown, ApiResponse<null>>(`/tasks/${id}/stop`, { force })
+
+// Merchants
+export const getMerchantsStats = () => api.get<unknown, ApiResponse<Record<string, number>>>('/merchants/stats')
+export const getMerchantsRaw = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<MerchantRaw>>>('/merchants/raw', { params })
+export const getMerchantsClean = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<MerchantClean>>>('/merchants/clean', { params })
+export const getMerchant = (id: number) => api.get<unknown, ApiResponse<MerchantRaw | MerchantClean>>(`/merchants/${id}`)
+
+// Channels
+export const getChannels = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<Channel>>>('/channels', { params })
+export const getChannelsStats = () => api.get<unknown, ApiResponse<ChannelStats>>('/channels/stats')
+
+// Nav Sites
+export const getNavSites = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<NavSite>>>('/nav-sites', { params })
+
+// Seeds
+export const getSeeds = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<Seed>>>('/seeds', { params })
+export const createSeed = (data: Partial<Seed>) => api.post<unknown, ApiResponse<Seed>>('/seeds', data)
+export const updateSeed = (id: number, data: Partial<Seed>) =>
+  api.put<unknown, ApiResponse<Seed>>(`/seeds/${id}`, data)
+export const deleteSeed = (id: number) => api.delete<unknown, ApiResponse<null>>(`/seeds/${id}`)
+
+// Keywords
+export const getKeywords = (params?: Record<string, unknown>) =>
+  api.get<unknown, ApiResponse<PagedResponse<Keyword>>>('/keywords', { params })
+export const createKeywords = (data: { keywords: string[]; category: string }) =>
+  api.post<unknown, ApiResponse<Keyword[]>>('/keywords', data)
+export const updateKeyword = (id: number, data: Partial<Keyword>) =>
+  api.put<unknown, ApiResponse<Keyword>>(`/keywords/${id}`, data)
+export const deleteKeyword = (id: number) => api.delete<unknown, ApiResponse<null>>(`/keywords/${id}`)
+
+// Settings
+export const getSettings = () => api.get<unknown, ApiResponse<Setting[]>>('/config/settings')
+export const updateSetting = (key: string, value: string) =>
+  api.put<unknown, ApiResponse<Setting>>(`/config/settings/${key}`, { value })

+ 126 - 0
web/src/components/Layout.tsx

@@ -0,0 +1,126 @@
+import { useState, useEffect } from 'react'
+import { Layout, Menu, theme } from 'antd'
+import {
+  DashboardOutlined,
+  PlayCircleOutlined,
+  DatabaseOutlined,
+  CheckCircleOutlined,
+  TeamOutlined,
+  GlobalOutlined,
+  NodeIndexOutlined,
+  TagsOutlined,
+  SettingOutlined,
+  FileTextOutlined,
+} from '@ant-design/icons'
+import { useNavigate, useLocation } from 'react-router-dom'
+
+const { Sider, Header, Content } = Layout
+
+interface LayoutProps {
+  children: React.ReactNode
+}
+
+const menuItems = [
+  { key: '/', icon: <DashboardOutlined />, label: '仪表盘' },
+  { key: '/tasks', icon: <PlayCircleOutlined />, label: '任务管理' },
+  { key: '/merchants/raw', icon: <DatabaseOutlined />, label: '原始商户' },
+  { key: '/merchants/clean', icon: <CheckCircleOutlined />, label: '清洗商户' },
+  { key: '/channels', icon: <TeamOutlined />, label: '频道列表' },
+  { key: '/nav-sites', icon: <GlobalOutlined />, label: '导航网页' },
+  { key: '/seeds', icon: <NodeIndexOutlined />, label: '种子管理' },
+  { key: '/keywords', icon: <TagsOutlined />, label: '关键词管理' },
+  { key: '/settings', icon: <SettingOutlined />, label: '系统配置' },
+  { key: '/logs', icon: <FileTextOutlined />, label: '实时日志' },
+]
+
+export default function AppLayout({ children }: LayoutProps) {
+  const navigate = useNavigate()
+  const location = useLocation()
+  const [currentTime, setCurrentTime] = useState(new Date())
+  const { token } = theme.useToken()
+
+  useEffect(() => {
+    const timer = setInterval(() => setCurrentTime(new Date()), 1000)
+    return () => clearInterval(timer)
+  }, [])
+
+  const formatTime = (date: Date) => {
+    return date.toLocaleString('zh-CN', {
+      year: 'numeric',
+      month: '2-digit',
+      day: '2-digit',
+      hour: '2-digit',
+      minute: '2-digit',
+      second: '2-digit',
+    })
+  }
+
+  return (
+    <Layout style={{ minHeight: '100vh' }}>
+      <Sider
+        width={220}
+        style={{
+          background: token.colorBgContainer,
+          borderRight: `1px solid ${token.colorBorderSecondary}`,
+          position: 'fixed',
+          height: '100vh',
+          left: 0,
+          top: 0,
+          bottom: 0,
+          zIndex: 100,
+        }}
+      >
+        <div
+          style={{
+            height: 64,
+            display: 'flex',
+            alignItems: 'center',
+            justifyContent: 'center',
+            borderBottom: `1px solid ${token.colorBorderSecondary}`,
+            padding: '0 16px',
+          }}
+        >
+          <span
+            style={{
+              fontSize: 16,
+              fontWeight: 700,
+              color: token.colorPrimary,
+              whiteSpace: 'nowrap',
+            }}
+          >
+            商户查找系统
+          </span>
+        </div>
+        <Menu
+          mode="inline"
+          selectedKeys={[location.pathname]}
+          style={{ border: 'none', marginTop: 8 }}
+          items={menuItems}
+          onClick={({ key }) => navigate(key)}
+        />
+      </Sider>
+      <Layout style={{ marginLeft: 220 }}>
+        <Header
+          style={{
+            background: token.colorBgContainer,
+            borderBottom: `1px solid ${token.colorBorderSecondary}`,
+            padding: '0 24px',
+            display: 'flex',
+            alignItems: 'center',
+            justifyContent: 'flex-end',
+            position: 'sticky',
+            top: 0,
+            zIndex: 99,
+          }}
+        >
+          <span style={{ color: token.colorTextSecondary, fontSize: 14 }}>
+            {formatTime(currentTime)}
+          </span>
+        </Header>
+        <Content style={{ padding: 24, minHeight: 'calc(100vh - 64px)' }}>
+          {children}
+        </Content>
+      </Layout>
+    </Layout>
+  )
+}

+ 183 - 0
web/src/components/TaskControl.tsx

@@ -0,0 +1,183 @@
+import { useState } from 'react'
+import {
+  Button,
+  Modal,
+  Switch,
+  InputNumber,
+  Form,
+  Progress,
+  Space,
+  Typography,
+  message,
+  Row,
+  Col,
+  Card,
+} from 'antd'
+import { StopOutlined } from '@ant-design/icons'
+import { startTask, stopTask, type StartTaskRequest } from '../api'
+import { useAppStore } from '../store'
+
+const { Text } = Typography
+
+interface TaskButton {
+  type: StartTaskRequest['task_type']
+  label: string
+  isPrimary?: boolean
+}
+
+const taskButtons: TaskButton[] = [
+  { type: 'full', label: '完整流水线', isPrimary: true },
+  { type: 'discover', label: '频道发现' },
+  { type: 'search', label: '搜索引擎' },
+  { type: 'github', label: 'GitHub采集' },
+  { type: 'scrape', label: 'TG采集' },
+  { type: 'crawl', label: '网页爬取' },
+  { type: 'clean', label: '数据清洗' },
+  { type: 'score', label: '评分' },
+]
+
+interface TaskControlProps {
+  onTaskStarted?: () => void
+}
+
+export default function TaskControl({ onTaskStarted }: TaskControlProps) {
+  const { runningTask, setRunningTask } = useAppStore()
+  const [modalOpen, setModalOpen] = useState(false)
+  const [selectedTask, setSelectedTask] = useState<TaskButton | null>(null)
+  const [testMode, setTestMode] = useState(false)
+  const [itemLimit, setItemLimit] = useState(10)
+  const [messageLimit, setMessageLimit] = useState(100)
+  const [loading, setLoading] = useState(false)
+  const [stopLoading, setStopLoading] = useState(false)
+
+  const handleTaskClick = (task: TaskButton) => {
+    setSelectedTask(task)
+    setTestMode(false)
+    setModalOpen(true)
+  }
+
+  const handleConfirm = async () => {
+    if (!selectedTask) return
+    setLoading(true)
+    try {
+      const req: StartTaskRequest = { task_type: selectedTask.type }
+      if (testMode) {
+        req.test_run = { item_limit: itemLimit, message_limit: messageLimit }
+      }
+      const res = await startTask(req)
+      setRunningTask(res.data)
+      message.success(`任务「${selectedTask.label}」已启动`)
+      setModalOpen(false)
+      onTaskStarted?.()
+    } catch (err) {
+      message.error('启动任务失败')
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const handleStop = async (force = false) => {
+    if (!runningTask) return
+    setStopLoading(true)
+    try {
+      await stopTask(runningTask.id, force)
+      setRunningTask(null)
+      message.success('任务已停止')
+    } catch {
+      message.error('停止任务失败')
+    } finally {
+      setStopLoading(false)
+    }
+  }
+
+  const isRunning = !!runningTask
+
+  return (
+    <Card title="任务控制" style={{ marginBottom: 24 }}>
+      <Space direction="vertical" style={{ width: '100%' }} size="middle">
+        <Row gutter={[8, 8]}>
+          {taskButtons.map((btn) => (
+            <Col key={btn.type}>
+              <Button
+                type={btn.isPrimary ? 'primary' : 'default'}
+                disabled={isRunning}
+                onClick={() => handleTaskClick(btn)}
+              >
+                {btn.label}
+              </Button>
+            </Col>
+          ))}
+          {isRunning && (
+            <Col>
+              <Button
+                danger
+                icon={<StopOutlined />}
+                loading={stopLoading}
+                onClick={() => handleStop(false)}
+              >
+                停止任务
+              </Button>
+            </Col>
+          )}
+        </Row>
+
+        {isRunning && runningTask ? (
+          <div>
+            <Space style={{ marginBottom: 8 }}>
+              <Text strong>当前任务:</Text>
+              <Text>{runningTask.task_type}</Text>
+              <Text type="secondary">阶段:{runningTask.current_phase || '初始化中'}</Text>
+            </Space>
+            <Progress
+              percent={runningTask.progress ?? 0}
+              status="active"
+            />
+          </div>
+        ) : (
+          <Text type="secondary">暂无运行中的任务</Text>
+        )}
+      </Space>
+
+      <Modal
+        title={`启动任务:${selectedTask?.label}`}
+        open={modalOpen}
+        onOk={handleConfirm}
+        onCancel={() => setModalOpen(false)}
+        confirmLoading={loading}
+        okText="确认启动"
+        cancelText="取消"
+      >
+        <Form layout="vertical" style={{ marginTop: 16 }}>
+          <Form.Item label="测试模式">
+            <Switch
+              checked={testMode}
+              onChange={setTestMode}
+              checkedChildren="开"
+              unCheckedChildren="关"
+            />
+          </Form.Item>
+          {testMode && (
+            <>
+              <Form.Item label="Item 限制">
+                <InputNumber
+                  min={1}
+                  value={itemLimit}
+                  onChange={(v) => setItemLimit(v ?? 10)}
+                  style={{ width: '100%' }}
+                />
+              </Form.Item>
+              <Form.Item label="Message 限制">
+                <InputNumber
+                  min={1}
+                  value={messageLimit}
+                  onChange={(v) => setMessageLimit(v ?? 100)}
+                  style={{ width: '100%' }}
+                />
+              </Form.Item>
+            </>
+          )}
+        </Form>
+      </Modal>
+    </Card>
+  )
+}

+ 13 - 0
web/src/main.tsx

@@ -0,0 +1,13 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import { ConfigProvider } from 'antd'
+import zhCN from 'antd/locale/zh_CN'
+import App from './App'
+
+ReactDOM.createRoot(document.getElementById('root')!).render(
+  <React.StrictMode>
+    <ConfigProvider locale={zhCN}>
+      <App />
+    </ConfigProvider>
+  </React.StrictMode>
+)

+ 192 - 0
web/src/pages/Channels.tsx

@@ -0,0 +1,192 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Table, Tag, Select, message, Row, Col, Card, Statistic, Badge } from 'antd'
+import { getChannels, getChannelsStats, type Channel, type ChannelStats } from '../api'
+
+const { Option } = Select
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+const statusOptions = [
+  { label: '全部', value: '' },
+  { label: 'active', value: 'active' },
+  { label: 'inactive', value: 'inactive' },
+  { label: 'banned', value: 'banned' },
+]
+
+const sourceOptions = [
+  { label: '全部', value: '' },
+  { label: 'seed', value: 'seed' },
+  { label: 'discover', value: 'discover' },
+  { label: 'search', value: 'search' },
+]
+
+const sourceColors: Record<string, string> = {
+  seed: 'blue',
+  discover: 'green',
+  search: 'purple',
+}
+
+const statusBadgeMap: Record<string, 'success' | 'error' | 'warning' | 'default'> = {
+  active: 'success',
+  inactive: 'default',
+  banned: 'error',
+}
+
+export default function Channels() {
+  const [data, setData] = useState<Channel[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [stats, setStats] = useState<ChannelStats | null>(null)
+  const [status, setStatus] = useState('')
+  const [source, setSource] = useState('')
+
+  const fetchData = useCallback(async (currentPage = 1) => {
+    setLoading(true)
+    try {
+      const params: Record<string, unknown> = { page: currentPage, page_size: 20 }
+      if (status) params.status = status
+      if (source) params.source = source
+      const res = await getChannels(params)
+      setData(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取频道列表失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [status, source])
+
+  const fetchStats = useCallback(async () => {
+    try {
+      const res = await getChannelsStats()
+      setStats(res.data)
+    } catch {
+      // ignore stats error
+    }
+  }, [])
+
+  useEffect(() => {
+    fetchStats()
+  }, [fetchStats])
+
+  useEffect(() => {
+    setPage(1)
+    fetchData(1)
+  }, [status, source, fetchData])
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 80 },
+    {
+      title: '用户名',
+      dataIndex: 'username',
+      key: 'username',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '标题',
+      dataIndex: 'title',
+      key: 'title',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '成员数',
+      dataIndex: 'member_count',
+      key: 'member_count',
+      render: (v: number) => v?.toLocaleString() ?? '-',
+    },
+    {
+      title: '来源',
+      dataIndex: 'source',
+      key: 'source',
+      render: (v: string) => <Tag color={sourceColors[v] ?? 'default'}>{v}</Tag>,
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (v: string) => <Badge status={statusBadgeMap[v] ?? 'default'} text={v} />,
+    },
+    {
+      title: '相关度评分',
+      dataIndex: 'relevance_score',
+      key: 'relevance_score',
+      render: (v: number) => v?.toFixed(2) ?? '-',
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+  ]
+
+  return (
+    <div>
+      {stats && (
+        <Row gutter={[16, 16]} style={{ marginBottom: 16 }}>
+          {Object.entries(stats.source_distribution ?? {}).map(([key, val]) => (
+            <Col key={key}>
+              <Card size="small">
+                <Statistic title={`来源: ${key}`} value={val as number} />
+              </Card>
+            </Col>
+          ))}
+          {Object.entries(stats.status_distribution ?? {}).map(([key, val]) => (
+            <Col key={key}>
+              <Card size="small">
+                <Statistic title={`状态: ${key}`} value={val as number} />
+              </Card>
+            </Col>
+          ))}
+        </Row>
+      )}
+
+      <Row gutter={[16, 16]} style={{ marginBottom: 16 }}>
+        <Col>
+          <Select
+            style={{ width: 160 }}
+            value={status}
+            onChange={setStatus}
+            placeholder="状态筛选"
+          >
+            {statusOptions.map((o) => (
+              <Option key={o.value} value={o.value}>{o.label}</Option>
+            ))}
+          </Select>
+        </Col>
+        <Col>
+          <Select
+            style={{ width: 160 }}
+            value={source}
+            onChange={setSource}
+            placeholder="来源筛选"
+          >
+            {sourceOptions.map((o) => (
+              <Option key={o.value} value={o.value}>{o.label}</Option>
+            ))}
+          </Select>
+        </Col>
+      </Row>
+
+      <Table
+        dataSource={data}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => {
+            setPage(p)
+            fetchData(p)
+          },
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+    </div>
+  )
+}

+ 171 - 0
web/src/pages/Dashboard.tsx

@@ -0,0 +1,171 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Card, Col, Row, Statistic, Table, Progress, Typography, Tag, message, Space } from 'antd'
+import {
+  TeamOutlined,
+  DatabaseOutlined,
+  CheckCircleOutlined,
+  SafetyCertificateOutlined,
+  GlobalOutlined,
+  NodeIndexOutlined,
+} from '@ant-design/icons'
+import { getDashboard, type DashboardData, type Task } from '../api'
+import { useAppStore } from '../store'
+
+const { Text } = Typography
+
+const taskStatusColor: Record<string, string> = {
+  running: 'processing',
+  completed: 'success',
+  failed: 'error',
+  stopped: 'warning',
+  pending: 'default',
+}
+
+const taskTypeColor: Record<string, string> = {
+  full: 'purple',
+  discover: 'blue',
+  search: 'cyan',
+  github: 'geekblue',
+  scrape: 'orange',
+  crawl: 'green',
+  clean: 'lime',
+  score: 'gold',
+}
+
+function formatDuration(start: string, end: string | null) {
+  if (!end) return '-'
+  const diff = Math.floor((new Date(end).getTime() - new Date(start).getTime()) / 1000)
+  if (diff < 60) return `${diff}秒`
+  if (diff < 3600) return `${Math.floor(diff / 60)}分${diff % 60}秒`
+  return `${Math.floor(diff / 3600)}时${Math.floor((diff % 3600) / 60)}分`
+}
+
+function formatDateTime(dateStr: string) {
+  const d = new Date(dateStr)
+  const mm = String(d.getMonth() + 1).padStart(2, '0')
+  const dd = String(d.getDate()).padStart(2, '0')
+  const hh = String(d.getHours()).padStart(2, '0')
+  const min = String(d.getMinutes()).padStart(2, '0')
+  const ss = String(d.getSeconds()).padStart(2, '0')
+  return `${mm}-${dd} ${hh}:${min}:${ss}`
+}
+
+export default function Dashboard() {
+  const [data, setData] = useState<DashboardData | null>(null)
+  const [loading, setLoading] = useState(true)
+  const { setRunningTask } = useAppStore()
+
+  const fetchData = useCallback(async () => {
+    try {
+      const res = await getDashboard()
+      setData(res.data)
+      setRunningTask(res.data.running_task)
+    } catch {
+      message.error('获取仪表盘数据失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [setRunningTask])
+
+  useEffect(() => {
+    fetchData()
+    const timer = setInterval(fetchData, 5000)
+    return () => clearInterval(timer)
+  }, [fetchData])
+
+  const recentTaskColumns = [
+    {
+      title: '任务类型',
+      dataIndex: 'task_type',
+      key: 'task_type',
+      render: (type: string) => <Tag color={taskTypeColor[type] ?? 'default'}>{type}</Tag>,
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (status: string) => (
+        <Tag color={taskStatusColor[status] ?? 'default'}>{status}</Tag>
+      ),
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+    {
+      title: '耗时',
+      key: 'duration',
+      render: (_: unknown, record: Task) =>
+        formatDuration(record.created_at, record.completed_at),
+    },
+  ]
+
+  const stats = [
+    { title: '频道总数', value: data?.channel_count ?? 0, icon: <TeamOutlined />, color: '#1890ff' },
+    { title: '原始商户', value: data?.merchant_raw_count ?? 0, icon: <DatabaseOutlined />, color: '#52c41a' },
+    { title: '清洗商户', value: data?.merchant_clean_count ?? 0, icon: <CheckCircleOutlined />, color: '#13c2c2' },
+    { title: '有效商户', value: data?.merchant_valid_count ?? 0, icon: <SafetyCertificateOutlined />, color: '#722ed1' },
+    { title: '导航网页', value: data?.nav_site_count ?? 0, icon: <GlobalOutlined />, color: '#fa8c16' },
+    { title: '种子数量', value: data?.seed_count ?? 0, icon: <NodeIndexOutlined />, color: '#eb2f96' },
+  ]
+
+  return (
+    <div>
+      <Row gutter={[16, 16]}>
+        {stats.map((stat) => (
+          <Col key={stat.title} xs={24} sm={12} md={8} lg={4}>
+            <Card loading={loading}>
+              <Statistic
+                title={
+                  <Space>
+                    <span style={{ color: stat.color }}>{stat.icon}</span>
+                    {stat.title}
+                  </Space>
+                }
+                value={stat.value}
+                valueStyle={{ color: stat.color }}
+              />
+            </Card>
+          </Col>
+        ))}
+      </Row>
+
+      <Row gutter={[16, 16]} style={{ marginTop: 16 }}>
+        <Col span={24}>
+          <Card title="当前任务" loading={loading}>
+            {data?.running_task ? (
+              <div>
+                <Space style={{ marginBottom: 12 }}>
+                  <Tag color="processing">{data.running_task.task_type}</Tag>
+                  <Text>阶段:{data.running_task.current_phase || '初始化中'}</Text>
+                </Space>
+                <Progress
+                  percent={data.running_task.progress ?? 0}
+                  status="active"
+                />
+              </div>
+            ) : (
+              <Text type="secondary">暂无运行中的任务</Text>
+            )}
+          </Card>
+        </Col>
+      </Row>
+
+      <Row gutter={[16, 16]} style={{ marginTop: 16 }}>
+        <Col span={24}>
+          <Card title="最近任务" loading={loading}>
+            <Table
+              dataSource={data?.recent_tasks ?? []}
+              columns={recentTaskColumns}
+              rowKey="id"
+              pagination={false}
+              size="small"
+            />
+          </Card>
+        </Col>
+      </Row>
+    </div>
+  )
+}

+ 228 - 0
web/src/pages/Keywords.tsx

@@ -0,0 +1,228 @@
+import { useEffect, useState, useCallback } from 'react'
+import {
+  Table,
+  Button,
+  Modal,
+  Form,
+  Input,
+  Select,
+  Switch,
+  Space,
+  message,
+  Popconfirm,
+  Tag,
+} from 'antd'
+import { PlusOutlined, DeleteOutlined } from '@ant-design/icons'
+import { getKeywords, createKeywords, updateKeyword, deleteKeyword, type Keyword } from '../api'
+
+const { Option } = Select
+const { TextArea } = Input
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+const categoryColors: Record<string, string> = {
+  product: 'blue',
+  service: 'green',
+  location: 'orange',
+  brand: 'purple',
+  other: 'default',
+}
+
+const categoryOptions = [
+  { label: 'product', value: 'product' },
+  { label: 'service', value: 'service' },
+  { label: 'location', value: 'location' },
+  { label: 'brand', value: 'brand' },
+  { label: 'other', value: 'other' },
+]
+
+interface BatchFormValues {
+  keywords_text: string
+  category: string
+}
+
+export default function Keywords() {
+  const [data, setData] = useState<Keyword[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [modalOpen, setModalOpen] = useState(false)
+  const [saving, setSaving] = useState(false)
+  const [form] = Form.useForm<BatchFormValues>()
+
+  const fetchData = useCallback(async (currentPage = 1) => {
+    setLoading(true)
+    try {
+      const res = await getKeywords({ page: currentPage, page_size: 20 })
+      setData(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取关键词列表失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [])
+
+  useEffect(() => {
+    fetchData(page)
+  }, [page, fetchData])
+
+  const handleBatchAdd = () => {
+    form.resetFields()
+    setModalOpen(true)
+  }
+
+  const handleSave = async () => {
+    try {
+      const values = await form.validateFields()
+      const keywords = values.keywords_text
+        .split('\n')
+        .map((k: string) => k.trim())
+        .filter((k: string) => k.length > 0)
+      if (keywords.length === 0) {
+        message.warning('请输入至少一个关键词')
+        return
+      }
+      setSaving(true)
+      await createKeywords({ keywords, category: values.category })
+      message.success(`成功添加 ${keywords.length} 个关键词`)
+      setModalOpen(false)
+      fetchData(page)
+    } catch (err) {
+      if (err && typeof err === 'object' && 'errorFields' in err) return
+      message.error('添加失败')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const handleDelete = async (id: number) => {
+    try {
+      await deleteKeyword(id)
+      message.success('删除成功')
+      fetchData(page)
+    } catch {
+      message.error('删除失败')
+    }
+  }
+
+  const handleStatusToggle = async (record: Keyword, checked: boolean) => {
+    try {
+      await updateKeyword(record.id, { status: checked })
+      message.success('状态已更新')
+      fetchData(page)
+    } catch {
+      message.error('状态更新失败')
+    }
+  }
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 80 },
+    {
+      title: '关键词',
+      dataIndex: 'keyword',
+      key: 'keyword',
+    },
+    {
+      title: '分类',
+      dataIndex: 'category',
+      key: 'category',
+      render: (v: string) => <Tag color={categoryColors[v] ?? 'default'}>{v}</Tag>,
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (v: boolean, record: Keyword) => (
+        <Switch
+          checked={v}
+          onChange={(checked) => handleStatusToggle(record, checked)}
+          checkedChildren="启用"
+          unCheckedChildren="禁用"
+        />
+      ),
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+    {
+      title: '操作',
+      key: 'action',
+      render: (_: unknown, record: Keyword) => (
+        <Space>
+          <Popconfirm
+            title="确认删除该关键词?"
+            onConfirm={() => handleDelete(record.id)}
+            okText="确认"
+            cancelText="取消"
+          >
+            <Button size="small" danger icon={<DeleteOutlined />}>删除</Button>
+          </Popconfirm>
+        </Space>
+      ),
+    },
+  ]
+
+  return (
+    <div>
+      <div style={{ marginBottom: 16 }}>
+        <Button type="primary" icon={<PlusOutlined />} onClick={handleBatchAdd}>
+          批量添加关键词
+        </Button>
+      </div>
+
+      <Table
+        dataSource={data}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => setPage(p),
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+
+      <Modal
+        title="批量添加关键词"
+        open={modalOpen}
+        onOk={handleSave}
+        onCancel={() => setModalOpen(false)}
+        confirmLoading={saving}
+        okText="添加"
+        cancelText="取消"
+      >
+        <Form form={form} layout="vertical" style={{ marginTop: 16 }}>
+          <Form.Item
+            name="keywords_text"
+            label="关键词列表(每行一个)"
+            rules={[{ required: true, message: '请输入关键词' }]}
+          >
+            <TextArea
+              rows={8}
+              placeholder="每行输入一个关键词,例如:&#10;外卖&#10;超市&#10;便利店"
+            />
+          </Form.Item>
+          <Form.Item
+            name="category"
+            label="分类"
+            rules={[{ required: true, message: '请选择分类' }]}
+          >
+            <Select placeholder="选择分类">
+              {categoryOptions.map((o) => (
+                <Option key={o.value} value={o.value}>{o.label}</Option>
+              ))}
+            </Select>
+          </Form.Item>
+        </Form>
+      </Modal>
+    </div>
+  )
+}

+ 194 - 0
web/src/pages/Logs.tsx

@@ -0,0 +1,194 @@
+import { useEffect, useRef, useState, useCallback } from 'react'
+import { Card, Typography, Badge, Space, Button, Select } from 'antd'
+import { ClearOutlined, PauseOutlined, PlayCircleOutlined } from '@ant-design/icons'
+import { useAppStore } from '../store'
+
+const { Text } = Typography
+const { Option } = Select
+
+interface LogLine {
+  id: number
+  text: string
+  timestamp: string
+}
+
+let logIdCounter = 0
+
+export default function Logs() {
+  const { runningTask } = useAppStore()
+  const [logs, setLogs] = useState<LogLine[]>([])
+  const [connected, setConnected] = useState(false)
+  const [paused, setPaused] = useState(false)
+  const [connectedTaskId, setConnectedTaskId] = useState<number | null>(null)
+  const wsRef = useRef<WebSocket | null>(null)
+  const logContainerRef = useRef<HTMLDivElement>(null)
+  const pausedRef = useRef(false)
+
+  pausedRef.current = paused
+
+  const appendLog = useCallback((text: string) => {
+    if (pausedRef.current) return
+    const now = new Date()
+    const ts = `${String(now.getHours()).padStart(2, '0')}:${String(now.getMinutes()).padStart(2, '0')}:${String(now.getSeconds()).padStart(2, '0')}`
+    setLogs((prev) => {
+      const newLog: LogLine = { id: ++logIdCounter, text, timestamp: ts }
+      const next = [...prev, newLog]
+      return next.length > 2000 ? next.slice(next.length - 2000) : next
+    })
+  }, [])
+
+  const connect = useCallback((taskId: number) => {
+    if (wsRef.current) {
+      wsRef.current.close()
+    }
+
+    setConnectedTaskId(taskId)
+    setLogs([])
+
+    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'
+    const host = window.location.host
+    const wsUrl = `${protocol}//${host}/api/v1/tasks/${taskId}/logs`
+
+    const ws = new WebSocket(wsUrl)
+    wsRef.current = ws
+
+    ws.onopen = () => {
+      setConnected(true)
+      appendLog(`[系统] 已连接到任务 #${taskId} 的日志流`)
+    }
+
+    ws.onmessage = (event: MessageEvent) => {
+      const text = typeof event.data === 'string' ? event.data : String(event.data)
+      appendLog(text)
+    }
+
+    ws.onerror = () => {
+      appendLog('[系统] WebSocket 连接错误')
+    }
+
+    ws.onclose = () => {
+      setConnected(false)
+      appendLog(`[系统] 与任务 #${taskId} 的连接已断开`)
+    }
+  }, [appendLog])
+
+  const runningTaskId = runningTask?.id
+
+  useEffect(() => {
+    if (runningTaskId !== undefined) {
+      connect(runningTaskId)
+    }
+    return () => {
+      wsRef.current?.close()
+    }
+  }, [runningTaskId, connect])
+
+  useEffect(() => {
+    if (!paused && logContainerRef.current) {
+      logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
+    }
+  }, [logs, paused])
+
+  const handleClear = () => {
+    setLogs([])
+  }
+
+  const handleTogglePause = () => {
+    setPaused((p) => !p)
+  }
+
+  const handleManualConnect = (taskId: number) => {
+    connect(taskId)
+  }
+
+  return (
+    <div>
+      <Card
+        title={
+          <Space>
+            <Badge
+              status={connected ? 'success' : 'error'}
+              text={connected ? '已连接' : '未连接'}
+            />
+            {connectedTaskId !== null && (
+              <Text type="secondary">任务 #{connectedTaskId}</Text>
+            )}
+            {!runningTask && (
+              <Space>
+                <Text type="secondary">手动连接任务 ID:</Text>
+                <Select
+                  style={{ width: 120 }}
+                  placeholder="输入任务ID"
+                  showSearch
+                  onSelect={(v: number) => handleManualConnect(v)}
+                >
+                  {[1, 2, 3, 4, 5].map((id) => (
+                    <Option key={id} value={id}>#{id}</Option>
+                  ))}
+                </Select>
+              </Space>
+            )}
+          </Space>
+        }
+        extra={
+          <Space>
+            <Button
+              size="small"
+              icon={paused ? <PlayCircleOutlined /> : <PauseOutlined />}
+              onClick={handleTogglePause}
+            >
+              {paused ? '继续' : '暂停'}
+            </Button>
+            <Button size="small" icon={<ClearOutlined />} onClick={handleClear}>
+              清空
+            </Button>
+          </Space>
+        }
+        styles={{ body: { padding: 0 } }}
+      >
+        <div
+          ref={logContainerRef}
+          style={{
+            background: '#1a1a1a',
+            color: '#00ff41',
+            fontFamily: 'Monaco, Menlo, "Courier New", monospace',
+            fontSize: 13,
+            lineHeight: '1.6',
+            height: 'calc(100vh - 220px)',
+            overflowY: 'auto',
+            padding: '12px 16px',
+            borderRadius: '0 0 8px 8px',
+          }}
+        >
+          {logs.length === 0 ? (
+            <span style={{ color: '#555' }}>
+              {runningTask
+                ? '正在连接日志流...'
+                : '当前没有运行中的任务。启动任务后日志将自动显示。'}
+            </span>
+          ) : (
+            logs.map((log) => (
+              <div key={log.id} style={{ display: 'flex', gap: 12 }}>
+                <span style={{ color: '#555', flexShrink: 0 }}>{log.timestamp}</span>
+                <span
+                  style={{
+                    color: log.text.includes('[ERROR]') || log.text.includes('[系统] WebSocket')
+                      ? '#ff6b6b'
+                      : log.text.includes('[WARN]')
+                      ? '#ffd93d'
+                      : log.text.includes('[系统]')
+                      ? '#74b9ff'
+                      : '#00ff41',
+                    wordBreak: 'break-all',
+                  }}
+                >
+                  {log.text}
+                </span>
+              </div>
+            ))
+          )}
+        </div>
+      </Card>
+    </div>
+  )
+}

+ 182 - 0
web/src/pages/MerchantsClean.tsx

@@ -0,0 +1,182 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Table, Tag, Select, Input, InputNumber, Space, message, Row, Col, Progress, Badge } from 'antd'
+import { getMerchantsClean, type MerchantClean } from '../api'
+
+const { Option } = Select
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+const statusOptions = [
+  { label: '全部', value: '' },
+  { label: 'valid', value: 'valid' },
+  { label: 'invalid', value: 'invalid' },
+  { label: 'bot', value: 'bot' },
+  { label: 'duplicate', value: 'duplicate' },
+  { label: 'group', value: 'group' },
+]
+
+const statusBadgeMap: Record<string, 'success' | 'error' | 'warning' | 'default' | 'processing'> = {
+  valid: 'success',
+  invalid: 'error',
+  bot: 'warning',
+  duplicate: 'default',
+  group: 'processing',
+}
+
+export default function MerchantsClean() {
+  const [data, setData] = useState<MerchantClean[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [status, setStatus] = useState('')
+  const [industry, setIndustry] = useState('')
+  const [minScore, setMinScore] = useState<number | null>(null)
+
+  const fetchData = useCallback(async (currentPage = 1) => {
+    setLoading(true)
+    try {
+      const params: Record<string, unknown> = {
+        page: currentPage,
+        page_size: 20,
+        order_by: 'quality_score',
+        order_dir: 'desc',
+      }
+      if (status) params.status = status
+      if (industry) params.industry = industry
+      if (minScore !== null) params.min_quality_score = minScore
+      const res = await getMerchantsClean(params)
+      setData(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取清洗商户数据失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [status, industry, minScore])
+
+  useEffect(() => {
+    setPage(1)
+    fetchData(1)
+  }, [status, industry, minScore, fetchData])
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 80 },
+    {
+      title: 'TG用户名',
+      dataIndex: 'tg_username',
+      key: 'tg_username',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '商户名',
+      dataIndex: 'merchant_name',
+      key: 'merchant_name',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (v: string) => (
+        <Badge status={statusBadgeMap[v] ?? 'default'} text={v} />
+      ),
+    },
+    {
+      title: '行业',
+      dataIndex: 'industry',
+      key: 'industry',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '质量分',
+      dataIndex: 'quality_score',
+      key: 'quality_score',
+      width: 160,
+      render: (v: number) => (
+        <Progress
+          percent={Math.round((v ?? 0) * 10) / 10}
+          size="small"
+          strokeColor={v >= 70 ? '#52c41a' : v >= 40 ? '#faad14' : '#ff4d4f'}
+        />
+      ),
+    },
+    {
+      title: 'Premium',
+      dataIndex: 'is_premium',
+      key: 'is_premium',
+      render: (v: boolean) => v ? <Tag color="gold">Premium</Tag> : <Tag>普通</Tag>,
+    },
+    {
+      title: '活跃度',
+      dataIndex: 'activity_score',
+      key: 'activity_score',
+      render: (v: number) => v?.toFixed(2) ?? '-',
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+  ]
+
+  return (
+    <div>
+      <Row gutter={[16, 16]} style={{ marginBottom: 16 }}>
+        <Col>
+          <Select
+            style={{ width: 160 }}
+            value={status}
+            onChange={setStatus}
+            placeholder="状态筛选"
+          >
+            {statusOptions.map((o) => (
+              <Option key={o.value} value={o.value}>{o.label}</Option>
+            ))}
+          </Select>
+        </Col>
+        <Col>
+          <Input
+            placeholder="行业"
+            value={industry}
+            onChange={(e) => setIndustry(e.target.value)}
+            style={{ width: 160 }}
+            allowClear
+          />
+        </Col>
+        <Col>
+          <Space>
+            <span>最低质量分:</span>
+            <InputNumber
+              min={0}
+              max={100}
+              value={minScore}
+              onChange={(v) => setMinScore(v)}
+              placeholder="0-100"
+              style={{ width: 100 }}
+            />
+          </Space>
+        </Col>
+      </Row>
+
+      <Table
+        dataSource={data}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => {
+            setPage(p)
+            fetchData(p)
+          },
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+    </div>
+  )
+}

+ 162 - 0
web/src/pages/MerchantsRaw.tsx

@@ -0,0 +1,162 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Table, Tag, Select, Input, Space, message, Row, Col } from 'antd'
+import { SearchOutlined } from '@ant-design/icons'
+import { getMerchantsRaw, type MerchantRaw } from '../api'
+
+const { Option } = Select
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+const statusOptions = [
+  { label: '全部', value: '' },
+  { label: 'raw', value: 'raw' },
+  { label: 'glm_parsed', value: 'glm_parsed' },
+]
+
+const sourceTypeOptions = [
+  { label: '全部', value: '' },
+  { label: 'telegram', value: 'telegram' },
+  { label: 'web', value: 'web' },
+  { label: 'github', value: 'github' },
+]
+
+export default function MerchantsRaw() {
+  const [data, setData] = useState<MerchantRaw[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [status, setStatus] = useState('')
+  const [sourceType, setSourceType] = useState('')
+  const [searchText, setSearchText] = useState('')
+
+  const fetchData = useCallback(async (currentPage = 1) => {
+    setLoading(true)
+    try {
+      const params: Record<string, unknown> = {
+        page: currentPage,
+        page_size: 20,
+        order_by: 'created_at',
+        order_dir: 'desc',
+      }
+      if (status) params.status = status
+      if (sourceType) params.source_type = sourceType
+      if (searchText) params.tg_username = searchText
+      const res = await getMerchantsRaw(params)
+      setData(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取原始商户数据失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [status, sourceType, searchText])
+
+  useEffect(() => {
+    setPage(1)
+    fetchData(1)
+  }, [status, sourceType, searchText, fetchData])
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 80 },
+    {
+      title: 'TG用户名',
+      dataIndex: 'tg_username',
+      key: 'tg_username',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '商户名',
+      dataIndex: 'merchant_name',
+      key: 'merchant_name',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '来源类型',
+      dataIndex: 'source_type',
+      key: 'source_type',
+      render: (v: string) => <Tag>{v}</Tag>,
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (v: string) => (
+        <Tag color={v === 'glm_parsed' ? 'green' : 'orange'}>{v}</Tag>
+      ),
+    },
+    {
+      title: '行业',
+      dataIndex: 'industry',
+      key: 'industry',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+  ]
+
+  return (
+    <div>
+      <Row gutter={[16, 16]} style={{ marginBottom: 16 }}>
+        <Col>
+          <Select
+            style={{ width: 160 }}
+            value={status}
+            onChange={setStatus}
+            placeholder="状态筛选"
+          >
+            {statusOptions.map((o) => (
+              <Option key={o.value} value={o.value}>{o.label}</Option>
+            ))}
+          </Select>
+        </Col>
+        <Col>
+          <Select
+            style={{ width: 160 }}
+            value={sourceType}
+            onChange={setSourceType}
+            placeholder="来源类型"
+          >
+            {sourceTypeOptions.map((o) => (
+              <Option key={o.value} value={o.value}>{o.label}</Option>
+            ))}
+          </Select>
+        </Col>
+        <Col>
+          <Space.Compact>
+            <Input
+              prefix={<SearchOutlined />}
+              placeholder="搜索 TG 用户名"
+              value={searchText}
+              onChange={(e) => setSearchText(e.target.value)}
+              style={{ width: 220 }}
+              allowClear
+            />
+          </Space.Compact>
+        </Col>
+      </Row>
+
+      <Table
+        dataSource={data}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => {
+            setPage(p)
+            fetchData(p)
+          },
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+    </div>
+  )
+}

+ 138 - 0
web/src/pages/NavSites.tsx

@@ -0,0 +1,138 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Table, Tag, Select, message, Row, Col } from 'antd'
+import { getNavSites, type NavSite } from '../api'
+
+const { Option } = Select
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+const statusOptions = [
+  { label: '全部', value: '' },
+  { label: 'pending', value: 'pending' },
+  { label: 'crawled', value: 'crawled' },
+  { label: 'filtered', value: 'filtered' },
+  { label: 'error', value: 'error' },
+]
+
+const statusColors: Record<string, string> = {
+  pending: 'orange',
+  crawled: 'green',
+  filtered: 'red',
+  error: 'volcano',
+}
+
+export default function NavSites() {
+  const [data, setData] = useState<NavSite[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [status, setStatus] = useState('')
+
+  const fetchData = useCallback(async (currentPage = 1) => {
+    setLoading(true)
+    try {
+      const params: Record<string, unknown> = { page: currentPage, page_size: 20 }
+      if (status) params.status = status
+      const res = await getNavSites(params)
+      setData(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取导航网页数据失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [status])
+
+  useEffect(() => {
+    setPage(1)
+    fetchData(1)
+  }, [status, fetchData])
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 80 },
+    {
+      title: 'URL',
+      dataIndex: 'url',
+      key: 'url',
+      render: (v: string) => (
+        <a
+          href={v}
+          target="_blank"
+          rel="noreferrer"
+          style={{ maxWidth: 300, display: 'inline-block', overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}
+          title={v}
+        >
+          {v.length > 60 ? v.slice(0, 60) + '...' : v}
+        </a>
+      ),
+    },
+    {
+      title: '域名',
+      dataIndex: 'domain',
+      key: 'domain',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (v: string) => <Tag color={statusColors[v] ?? 'default'}>{v}</Tag>,
+    },
+    {
+      title: '过滤原因',
+      dataIndex: 'filter_reason',
+      key: 'filter_reason',
+      render: (v: string | null) => v || '-',
+    },
+    {
+      title: '商户数量',
+      dataIndex: 'merchant_count',
+      key: 'merchant_count',
+      render: (v: number) => v ?? 0,
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+  ]
+
+  return (
+    <div>
+      <Row gutter={[16, 16]} style={{ marginBottom: 16 }}>
+        <Col>
+          <Select
+            style={{ width: 160 }}
+            value={status}
+            onChange={setStatus}
+            placeholder="状态筛选"
+          >
+            {statusOptions.map((o) => (
+              <Option key={o.value} value={o.value}>{o.label}</Option>
+            ))}
+          </Select>
+        </Col>
+      </Row>
+
+      <Table
+        dataSource={data}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => {
+            setPage(p)
+            fetchData(p)
+          },
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+    </div>
+  )
+}

+ 217 - 0
web/src/pages/Seeds.tsx

@@ -0,0 +1,217 @@
+import { useEffect, useState, useCallback } from 'react'
+import {
+  Table,
+  Button,
+  Modal,
+  Form,
+  Input,
+  Switch,
+  Space,
+  message,
+  Popconfirm,
+} from 'antd'
+import { PlusOutlined, EditOutlined, DeleteOutlined } from '@ant-design/icons'
+import { getSeeds, createSeed, updateSeed, deleteSeed, type Seed } from '../api'
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+interface SeedFormValues {
+  channel_name: string
+  note: string
+  status: boolean
+}
+
+export default function Seeds() {
+  const [data, setData] = useState<Seed[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [modalOpen, setModalOpen] = useState(false)
+  const [editingRecord, setEditingRecord] = useState<Seed | null>(null)
+  const [saving, setSaving] = useState(false)
+  const [form] = Form.useForm<SeedFormValues>()
+
+  const fetchData = useCallback(async (currentPage = 1) => {
+    setLoading(true)
+    try {
+      const res = await getSeeds({ page: currentPage, page_size: 20 })
+      setData(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取种子列表失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [])
+
+  useEffect(() => {
+    fetchData(page)
+  }, [page, fetchData])
+
+  const handleAdd = () => {
+    setEditingRecord(null)
+    form.resetFields()
+    form.setFieldsValue({ status: true })
+    setModalOpen(true)
+  }
+
+  const handleEdit = (record: Seed) => {
+    setEditingRecord(record)
+    form.setFieldsValue({
+      channel_name: record.channel_name,
+      note: record.note,
+      status: record.status,
+    })
+    setModalOpen(true)
+  }
+
+  const handleDelete = async (id: number) => {
+    try {
+      await deleteSeed(id)
+      message.success('删除成功')
+      fetchData(page)
+    } catch {
+      message.error('删除失败')
+    }
+  }
+
+  const handleSave = async () => {
+    try {
+      const values = await form.validateFields()
+      setSaving(true)
+      if (editingRecord) {
+        await updateSeed(editingRecord.id, values)
+        message.success('更新成功')
+      } else {
+        await createSeed(values)
+        message.success('添加成功')
+      }
+      setModalOpen(false)
+      fetchData(page)
+    } catch (err) {
+      if (err && typeof err === 'object' && 'errorFields' in err) return
+      message.error('保存失败')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const handleStatusToggle = async (record: Seed, checked: boolean) => {
+    try {
+      await updateSeed(record.id, { status: checked })
+      message.success('状态已更新')
+      fetchData(page)
+    } catch {
+      message.error('状态更新失败')
+    }
+  }
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 80 },
+    {
+      title: '频道名',
+      dataIndex: 'channel_name',
+      key: 'channel_name',
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (v: boolean, record: Seed) => (
+        <Switch
+          checked={v}
+          onChange={(checked) => handleStatusToggle(record, checked)}
+          checkedChildren="启用"
+          unCheckedChildren="禁用"
+        />
+      ),
+    },
+    {
+      title: '备注',
+      dataIndex: 'note',
+      key: 'note',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+    {
+      title: '操作',
+      key: 'action',
+      render: (_: unknown, record: Seed) => (
+        <Space>
+          <Button
+            size="small"
+            icon={<EditOutlined />}
+            onClick={() => handleEdit(record)}
+          >
+            编辑
+          </Button>
+          <Popconfirm
+            title="确认删除该种子?"
+            onConfirm={() => handleDelete(record.id)}
+            okText="确认"
+            cancelText="取消"
+          >
+            <Button size="small" danger icon={<DeleteOutlined />}>删除</Button>
+          </Popconfirm>
+        </Space>
+      ),
+    },
+  ]
+
+  return (
+    <div>
+      <div style={{ marginBottom: 16 }}>
+        <Button type="primary" icon={<PlusOutlined />} onClick={handleAdd}>
+          添加种子
+        </Button>
+      </div>
+
+      <Table
+        dataSource={data}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => setPage(p),
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+
+      <Modal
+        title={editingRecord ? '编辑种子' : '添加种子'}
+        open={modalOpen}
+        onOk={handleSave}
+        onCancel={() => setModalOpen(false)}
+        confirmLoading={saving}
+        okText="保存"
+        cancelText="取消"
+      >
+        <Form form={form} layout="vertical" style={{ marginTop: 16 }}>
+          <Form.Item
+            name="channel_name"
+            label="频道名"
+            rules={[{ required: true, message: '请输入频道名' }]}
+          >
+            <Input placeholder="例如:@channel_name" />
+          </Form.Item>
+          <Form.Item name="note" label="备注">
+            <Input.TextArea rows={3} placeholder="可选备注" />
+          </Form.Item>
+          <Form.Item name="status" label="状态" valuePropName="checked">
+            <Switch checkedChildren="启用" unCheckedChildren="禁用" />
+          </Form.Item>
+        </Form>
+      </Modal>
+    </div>
+  )
+}

+ 167 - 0
web/src/pages/Settings.tsx

@@ -0,0 +1,167 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Tabs, Table, Input, Button, Space, message } from 'antd'
+import { EditOutlined, SaveOutlined, CloseOutlined } from '@ant-design/icons'
+import { getSettings, updateSetting, type Setting } from '../api'
+import Seeds from './Seeds'
+import Keywords from './Keywords'
+
+function formatDateTime(dateStr: string) {
+  return new Date(dateStr).toLocaleString('zh-CN')
+}
+
+function PipelineSettings() {
+  const [data, setData] = useState<Setting[]>([])
+  const [loading, setLoading] = useState(false)
+  const [editingKey, setEditingKey] = useState<string | null>(null)
+  const [editingValue, setEditingValue] = useState('')
+  const [saving, setSaving] = useState(false)
+
+  const fetchData = useCallback(async () => {
+    setLoading(true)
+    try {
+      const res = await getSettings()
+      setData(res.data)
+    } catch {
+      message.error('获取配置失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [])
+
+  useEffect(() => {
+    fetchData()
+  }, [fetchData])
+
+  const handleEdit = (record: Setting) => {
+    setEditingKey(record.key)
+    setEditingValue(record.value)
+  }
+
+  const handleSave = async (key: string) => {
+    setSaving(true)
+    try {
+      await updateSetting(key, editingValue)
+      message.success('保存成功')
+      setEditingKey(null)
+      fetchData()
+    } catch {
+      message.error('保存失败')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const handleCancel = () => {
+    setEditingKey(null)
+    setEditingValue('')
+  }
+
+  const columns = [
+    {
+      title: '配置项',
+      dataIndex: 'key',
+      key: 'key',
+      width: 240,
+    },
+    {
+      title: '值',
+      dataIndex: 'value',
+      key: 'value',
+      render: (v: string, record: Setting) => {
+        if (editingKey === record.key) {
+          return (
+            <Input
+              value={editingValue}
+              onChange={(e) => setEditingValue(e.target.value)}
+              onPressEnter={() => handleSave(record.key)}
+              size="small"
+            />
+          )
+        }
+        return <span>{v}</span>
+      },
+    },
+    {
+      title: '描述',
+      dataIndex: 'description',
+      key: 'description',
+      render: (v: string) => v || '-',
+    },
+    {
+      title: '更新时间',
+      dataIndex: 'updated_at',
+      key: 'updated_at',
+      render: (t: string) => t ? formatDateTime(t) : '-',
+    },
+    {
+      title: '操作',
+      key: 'action',
+      width: 150,
+      render: (_: unknown, record: Setting) => {
+        if (editingKey === record.key) {
+          return (
+            <Space>
+              <Button
+                size="small"
+                type="primary"
+                icon={<SaveOutlined />}
+                loading={saving}
+                onClick={() => handleSave(record.key)}
+              >
+                保存
+              </Button>
+              <Button size="small" icon={<CloseOutlined />} onClick={handleCancel}>
+                取消
+              </Button>
+            </Space>
+          )
+        }
+        return (
+          <Button
+            size="small"
+            icon={<EditOutlined />}
+            onClick={() => handleEdit(record)}
+          >
+            编辑
+          </Button>
+        )
+      },
+    },
+  ]
+
+  return (
+    <Table
+      dataSource={data}
+      columns={columns}
+      rowKey="key"
+      loading={loading}
+      pagination={false}
+    />
+  )
+}
+
+const tabItems = [
+  {
+    key: 'pipeline',
+    label: '流水线参数',
+    children: <PipelineSettings />,
+  },
+  {
+    key: 'seeds',
+    label: '种子管理',
+    children: <Seeds />,
+  },
+  {
+    key: 'keywords',
+    label: '关键词管理',
+    children: <Keywords />,
+  },
+]
+
+export default function Settings() {
+  return (
+    <div>
+      <Tabs items={tabItems} />
+    </div>
+  )
+}

+ 154 - 0
web/src/pages/Tasks.tsx

@@ -0,0 +1,154 @@
+import { useEffect, useState, useCallback } from 'react'
+import { Table, Tag, Button, message, Badge } from 'antd'
+import { StopOutlined } from '@ant-design/icons'
+import { getTasks, stopTask, type Task } from '../api'
+import { useAppStore } from '../store'
+import TaskControl from '../components/TaskControl'
+
+const taskTypeColor: Record<string, string> = {
+  full: 'purple',
+  discover: 'blue',
+  search: 'cyan',
+  github: 'geekblue',
+  scrape: 'orange',
+  crawl: 'green',
+  clean: 'lime',
+  score: 'gold',
+}
+
+const taskStatusBadge: Record<string, 'processing' | 'success' | 'error' | 'warning' | 'default'> = {
+  running: 'processing',
+  completed: 'success',
+  failed: 'error',
+  stopped: 'warning',
+  pending: 'default',
+}
+
+function formatDateTime(dateStr: string | null) {
+  if (!dateStr) return '-'
+  const d = new Date(dateStr)
+  return d.toLocaleString('zh-CN')
+}
+
+export default function Tasks() {
+  const [tasks, setTasks] = useState<Task[]>([])
+  const [total, setTotal] = useState(0)
+  const [page, setPage] = useState(1)
+  const [loading, setLoading] = useState(false)
+  const [stoppingId, setStoppingId] = useState<number | null>(null)
+  const { runningTask, setRunningTask } = useAppStore()
+
+  const fetchTasks = useCallback(async (currentPage = page) => {
+    setLoading(true)
+    try {
+      const res = await getTasks({ page: currentPage, page_size: 20 })
+      setTasks(res.data.items)
+      setTotal(res.data.total)
+    } catch {
+      message.error('获取任务列表失败')
+    } finally {
+      setLoading(false)
+    }
+  }, [page])
+
+  useEffect(() => {
+    fetchTasks(page)
+  }, [page, fetchTasks])
+
+  useEffect(() => {
+    if (!runningTask) return
+    const timer = setInterval(() => fetchTasks(page), 3000)
+    return () => clearInterval(timer)
+  }, [runningTask, fetchTasks, page])
+
+  const handleStop = async (id: number) => {
+    setStoppingId(id)
+    try {
+      await stopTask(id)
+      setRunningTask(null)
+      message.success('任务已停止')
+      fetchTasks(page)
+    } catch {
+      message.error('停止任务失败')
+    } finally {
+      setStoppingId(null)
+    }
+  }
+
+  const columns = [
+    { title: 'ID', dataIndex: 'id', key: 'id', width: 70 },
+    {
+      title: '任务类型',
+      dataIndex: 'task_type',
+      key: 'task_type',
+      render: (type: string) => <Tag color={taskTypeColor[type] ?? 'default'}>{type}</Tag>,
+    },
+    {
+      title: '状态',
+      dataIndex: 'status',
+      key: 'status',
+      render: (status: string) => (
+        <Badge status={taskStatusBadge[status] ?? 'default'} text={status} />
+      ),
+    },
+    {
+      title: '参数预览',
+      dataIndex: 'params',
+      key: 'params',
+      render: (params: Record<string, unknown>) => (
+        <span style={{ fontSize: 12, color: '#666' }}>
+          {JSON.stringify(params).slice(0, 60)}
+          {JSON.stringify(params).length > 60 ? '...' : ''}
+        </span>
+      ),
+      ellipsis: true,
+    },
+    {
+      title: '创建时间',
+      dataIndex: 'created_at',
+      key: 'created_at',
+      render: (t: string) => formatDateTime(t),
+    },
+    {
+      title: '完成时间',
+      dataIndex: 'completed_at',
+      key: 'completed_at',
+      render: (t: string | null) => formatDateTime(t),
+    },
+    {
+      title: '操作',
+      key: 'action',
+      render: (_: unknown, record: Task) =>
+        record.status === 'running' ? (
+          <Button
+            danger
+            size="small"
+            icon={<StopOutlined />}
+            loading={stoppingId === record.id}
+            onClick={() => handleStop(record.id)}
+          >
+            停止
+          </Button>
+        ) : null,
+    },
+  ]
+
+  return (
+    <div>
+      <TaskControl onTaskStarted={() => fetchTasks(1)} />
+      <Table
+        dataSource={tasks}
+        columns={columns}
+        rowKey="id"
+        loading={loading}
+        pagination={{
+          current: page,
+          pageSize: 20,
+          total,
+          onChange: (p) => setPage(p),
+          showTotal: (t) => `共 ${t} 条`,
+        }}
+      />
+    </div>
+  )
+}

+ 12 - 0
web/src/store/index.ts

@@ -0,0 +1,12 @@
+import { create } from 'zustand'
+import type { Task } from '../api'
+
+interface AppState {
+  runningTask: Task | null
+  setRunningTask: (task: Task | null) => void
+}
+
+export const useAppStore = create<AppState>((set) => ({
+  runningTask: null,
+  setRunningTask: (task) => set({ runningTask: task }),
+}))

+ 21 - 0
web/tsconfig.json

@@ -0,0 +1,21 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noFallthroughCasesInSwitch": true
+  },
+  "include": ["src"],
+  "references": [{ "path": "./tsconfig.node.json" }]
+}

+ 10 - 0
web/tsconfig.node.json

@@ -0,0 +1,10 @@
+{
+  "compilerOptions": {
+    "composite": true,
+    "skipLibCheck": true,
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "allowSyntheticDefaultImports": true
+  },
+  "include": ["vite.config.ts"]
+}

+ 12 - 0
web/vite.config.ts

@@ -0,0 +1,12 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    proxy: {
+      '/api': 'http://localhost:8080',
+      '/ws': { target: 'ws://localhost:8080', ws: true }
+    }
+  }
+})

+ 502 - 0
爬虫文档.md

@@ -0,0 +1,502 @@
+# TG Lead Scraper 系统指南
+
+> 最后更新: 2026-04-08
+> 代码位置: `23.95.10.148:/opt/tg-lead-scraper/`
+> 前端位置: `/Users/admin/claude/tg-lead-scraper-frontend/`
+
+---
+
+## 一、系统概览
+
+**一句话**: 从 Telegram 频道/网页/搜索结果/GitHub 自动挖掘商户联系方式,清洗去重验证,最后按质量打分。
+
+**核心能力**: 输入是"种子频道"和"关键词",输出是**可直接外呼的商户清单**(带 TG 用户名、网站、邮箱、电话、行业标签、质量分)。
+
+---
+
+## 二、数据模型(输入 → 输出)
+
+```
+输入源
+├── managed_seeds      TG 频道起点(当前: 1 个 bbs3000)
+├── managed_keywords   搜索关键词(当前: 108 个)
+└── industry_rules.yaml  行业分类规则
+              ↓
+   7 阶段 Pipeline 处理
+              ↓
+中间表
+├── channels       发现到的 TG 频道(当前: 1907+ 条)
+├── nav_sites      候选导航网页(当前: 646 条)
+└── merchants_raw  待清洗商户(当前: 1946 条)
+              ↓
+   cleaner 三关过滤
+              ↓
+最终输出
+└── merchants_clean  可用商户(valid/invalid/bot/duplicate/group)
+```
+
+---
+
+## 三、7 阶段 Pipeline 完整流程
+
+### 全景图
+
+```
+Phase 1 discover    → Phase 2 search   → Phase 3 github
+    ↓                     ↓                    ↓
+  (snowball 裂变)      (关键词搜索)         (GitHub README 挖)
+    ↓                     ↓                    ↓
+  channels 表        nav_sites + channels    channels 表
+    ↓                     ↓                    ↓
+            ┌─────────────┴──────────────┐
+            ↓                            ↓
+Phase 4 scrape                    Phase 5 crawl
+ (TG 消息抓取)                     (网页爬取)
+            ↓                            ↓
+      merchants_raw              merchants_raw
+            ↓                            ↓
+            └──────────┬─────────────────┘
+                       ↓
+            Phase 6 clean (清洗三关)
+                       ↓
+            merchants_clean
+                       ↓
+            Phase 7 score (打分)
+                       ↓
+         merchants_clean.quality_score
+```
+
+### Phase 1: discover (频道发现)
+
+- **文件**: `core/snowball.py`
+- **做什么**: 从种子频道出发,裂变发现更多相关频道
+
+**流程**:
+1. 从 `managed_seeds` 拿起点(比如 `@bbs3000`)
+2. 用 TG 账号进入每个种子,读最近 100 条消息
+3. 提取消息里的 forward_from 和 TG 推荐频道
+4. 把新发现的频道当第二层种子继续裂变
+5. 最多 3 层(max_depth),每层 200 个上限,全局 500 个上限(防指数爆炸)
+6. 每个频道间 sleep 5 秒
+
+**输入**: `managed_seeds` + TG 账号
+**输出**: 写入 `channels` 表,source='seed' 或 'snowball'
+**瓶颈**: 单账号 FloodWait(历史上裂变 200 个就被限速)
+
+### Phase 2: search (搜索引擎)
+
+- **文件**: `core/search_engine.py`
+- **做什么**: 用关键词去 Google 搜索,把结果里的 TG 频道和导航站分拣入库
+
+**流程**:
+1. 从 `managed_keywords` 拿 108 个关键词(如"机场推荐"、"发卡网")
+2. 对每个关键词调 Serper API(Google Search)
+3. 翻页多次,每页 10 条结果
+4. 识别结果 URL:
+   - 是 `t.me/xxx` → 写 `channels` 表
+   - 是导航站(domain 含 nav/list/catalog)→ 写 `nav_sites` 表
+   - 是博客/产品官网/社交媒体 → 丢弃
+5. 关键词间 sleep 若干秒避免限速
+6. Serper 失败可 fallback 到 DuckDuckGo
+
+**外部依赖**: Serper API(**当前 key 已失效**,400 Bad Request)
+**输出**: `channels` + `nav_sites`
+
+### Phase 3: github (GitHub 采集)
+
+- **文件**: `core/github_crawler.py`
+- **做什么**: 搜 GitHub 仓库的 README 里的 TG 链接
+
+**流程**:
+1. 用 query 搜 GitHub repo(按 star 排序)
+2. 下载每个 repo 的 README.md
+3. 要求 README 前 5000 字含中文,过滤英文项目
+4. 正则匹配 `t.me/xxx` 链接
+5. 链接前后 200 字必须含中文才算有效(过滤通用 bot 和 proxy 频道)
+6. repo 间 sleep 2s,query 间 sleep 5s
+
+**外部依赖**: GitHub Search API(**无 token**,rate limit 10 req/min)
+**输出**: `channels` 表,source='github'
+
+### Phase 4: scrape (TG 消息采集) ← 最慢最贵的阶段
+
+- **文件**: `core/scraper.py`
+- **做什么**: 真正进入 TG 频道,读历史消息,提取商户
+
+**流程**:
+1. 从 `channels` 表拿 status='pending' 的频道
+2. 对每个频道:
+   - `get_entity(channel_username)` 解析频道
+   - **GLM 相关性评估**:频道名+简介+成员数传给 GLM 判"是不是商户相关",不相关直接 skip
+   - 读频道简介(about)
+   - 读置顶消息(limit=20)
+   - 遍历历史消息(limit=500,断点续传用 `last_message_id`)
+3. 每条消息:
+   - `MessageService` 系统消息跳过
+   - 非中文跳过
+   - 先用**正则** `extract_contacts_enhanced` 快速判是否含联系方式
+   - 有联系方式 → 调 **GLM 精准解析** 提取 merchant(failover 到正则)
+   - 写入 `merchants_raw`
+4. 消息间 sleep `delay_message`,频道间 sleep `delay_channel`
+
+**输入**: `channels` 表 + `managed_settings.tg_scraper.*`
+**输出**: `merchants_raw`
+**瓶颈**: 单账号每 200-500 次 `get_entity` 触发 FloodWait 10-24 小时
+
+### Phase 5: crawl (网页爬取)
+
+- **文件**: `core/web_crawler.py` + `core/nav_filter.py`
+- **做什么**: 爬导航站,从网页 HTML 提取商户
+
+**流程**:
+1. 从 `nav_sites` 表拿 status='pending' 的网页
+2. **预过滤** `rule_filter(url)`:
+   - 黑名单域名(t.me/twitter/google 等 80 个)→ filtered
+   - 黑名单扩展名(.apk/.zip/.pdf 等 40 种)→ filtered
+   - 黑名单路径(`/api/`、`/login/`、`?ref=` 等)→ filtered
+   - 正向信号(含 nav/directory/catalog)→ valid
+   - 都不确定 → 弱候选,进 GLM 二次过滤
+3. **GLM 二次过滤**:问 GLM "这个 URL 是不是导航站",置信度 ≥0.6 才放行
+4. 对通过的网页:
+   - 用 requests → cloudscraper → playwright **三层 fallback** 抓 HTML
+   - HTML 前 5000 字非中文直接跳过
+   - BeautifulSoup 解析出商户链接(CSS 选择器可配置)
+5. 对每个商户链接:
+   - 如果直接带 `@tg_username` → 走 TG 入库路径
+   - **t.me 死号预检**(新加的):抓 t.me/{username} 网页,没头像就 drop
+   - 活号 → 写 `merchants_raw`
+   - 如果只有网站链接 → 爬商户首页 + `/contact`、`/about` 等子页,用 extractor 提取联系方式
+
+**输入**: `nav_sites` 表
+**输出**: `merchants_raw`
+
+### Phase 6: clean (数据清洗) ← 三关过滤
+
+- **文件**: `core/cleaner.py`
+- **做什么**: 对 raw 商户做黑名单过滤、去重、真实性验证
+
+**流程**:
+
+#### 第一关: `_filter_blacklist`(本地,秒级)
+- 黑名单 username(26 个系统 bot + `xxxbot` 后缀)→ 标 bot
+- 邀请链接哈希(16-24 位 base64 + 高熵)→ 标 invalid
+- `original_message` 非空且不含中文 → 标 invalid
+
+#### 第二关: `_deduplicate`(本地,秒级)
+- 同 username 多条记录,按信息丰富度打分(有 website/email/phone 加分)
+- 保留最丰富的一条,副本迁 clean 桶标 duplicate
+- 合并所有 source 链接到 keeper
+
+#### 第三关: `_verify_merchant`(最贵,TG API)
+- 调 Telethon `client.get_entity(username)` 去 TG 服务器验证
+- 返回 User 且非 bot → **valid**(顺手拿 first_name/last_name/is_premium/last_online/active_level)
+- 返回 Bot → bot
+- 返回 Channel/Chat → group
+- UsernameNotOccupied → invalid
+- FloodWait ≤60s → 重试
+- FloodWait >60s → 切账号
+- **FloodWait >300s → 直接 break 整轮**(修复后)
+
+**输入**: `merchants_raw` status='raw'
+**输出**: `merchants_clean` (valid/invalid/bot/duplicate/group)
+**瓶颈**: 同 Phase 4,TG API 节流严
+
+### Phase 7: score (商户评分)
+
+- **文件**: `core/scorer.py`
+- **做什么**: 对 clean 桶的商户按 6 维度加权打分
+
+**6 个维度**(总权重 1.0):
+
+| 维度 | 权重 | 规则 |
+|---|---|---|
+| `member_count` | 0.25 | <100→10 / <1k→30 / <1w→50 / <10w→80 / ≥10w→100 |
+| `premium` | 0.15 | 是 TG Premium→100,不是→0 |
+| `activity` | 0.25 | active→100 / moderate→50 / inactive→20 |
+| `multi_source` | 0.20 | 被多个来源发现→100 / 3+→70 / 2→40 / 1→10 |
+| `has_website` | 0.10 | 有→100,没有→0 |
+| `has_email` | 0.05 | 有→100,没有→0 |
+
+**可选第 7 维**: GLM 内容质量打分(默认关闭,因为 GLM API 会 hang)
+
+**输入**: `merchants_raw` + `merchants_clean` 两表
+**输出**: 写 `merchant.quality_score`(0-100)
+
+---
+
+## 四、核心模块对照表
+
+| 模块 | 文件 | 主要职责 |
+|---|---|---|
+| snowball | `core/snowball.py` | Phase 1 频道裂变 |
+| search_engine | `core/search_engine.py` | Phase 2 Serper/DuckDuckGo 搜索 |
+| github_crawler | `core/github_crawler.py` | Phase 3 GitHub README 挖 TG 链接 |
+| scraper | `core/scraper.py` | Phase 4 TG 消息采集 |
+| web_crawler | `core/web_crawler.py` | Phase 5 导航站爬取 |
+| nav_filter | `core/nav_filter.py` | 导航站识别过滤器 |
+| cleaner | `core/cleaner.py` | Phase 6 清洗三关 |
+| scorer | `core/scorer.py` | Phase 7 评分 |
+| extractor | `core/extractor.py` | 联系方式提取(正则+GLM) |
+| classifier | `core/classifier.py` | 行业分类(关键词+GLM) |
+| account_manager | `core/account_manager.py` | 多 TG 账号轮换 + FloodWait 管理 |
+| pipeline | `core/pipeline.py` | Pipeline 状态管理 |
+| task_manager | `core/task_manager.py` | 任务调度 + 看门狗 + 断点续传 |
+| config_service | `core/config_service.py` | 配置服务 (managed_settings) |
+| database | `core/database.py` | ORM + promote_merchant helper |
+| tme_validator | `core/tme_validator.py` | t.me 网页死号预检(新加) |
+
+---
+
+## 五、具体能实现的功能清单
+
+### 📥 数据采集能力
+- ✅ 从种子 TG 频道裂变发现新频道(snowball,最多 3 层,500 个上限)
+- ✅ 用关键词从 Google 搜索引擎发现 TG 频道和导航网页
+- ✅ 从 GitHub 仓库 README 挖 TG 链接
+- ✅ 抓取 TG 频道历史消息(支持断点续传,单频道最多 500 条)
+- ✅ 抓取 TG 频道简介 + 置顶消息
+- ✅ 爬取导航网站 HTML(三层 fallback: requests / cloudscraper / playwright)
+- ✅ 爬取商户官网 + 常见子页(`/contact`, `/about`, `/关于我们`)
+
+### 🧠 智能识别能力
+- ✅ GLM 频道相关性评估(过滤掉不是商户的频道)
+- ✅ GLM 消息商户解析(提取非标准格式如"加V:xxx"、"t点me/xxx")
+- ✅ GLM 行业分类(机场/发卡/成人等,可配置)
+- ✅ GLM 导航站识别(弱候选 URL 交 GLM 判断)
+- ✅ 正则提取联系方式(TG 用户名 / t.me 链接 / 邮箱 / 电话 / 网址)
+- ✅ 中文检测(非中文消息/商户名直接跳过)
+- ✅ 联系意图识别(文本里含客服/购买/咨询等关键词标记)
+
+### 🧹 数据清洗能力
+- ✅ 26 条系统 bot 黑名单过滤
+- ✅ 邀请链接哈希识别(base64 + 高熵检测)
+- ✅ 80+ 域名黑名单(社交媒体/大站/政府站)
+- ✅ 40+ 扩展名黑名单(apk/zip/pdf 等非网页资源)
+- ✅ 非中文内容过滤
+- ✅ 同 username 去重 + 信息合并
+- ✅ **t.me 网页死号预检**(新加,16% 死号率,100% 准确)
+- ✅ Telethon 真实性验证(拿头像/显示名/premium/最后在线时间)
+- ✅ FloodWait 智能处理(短等待重试,长等待切账号,超 300s 跳过)
+
+### 📊 评分与分类
+- ✅ 6 维度加权打分(成员数/Premium/活跃度/多来源/网站/邮箱)
+- ✅ 行业标签(关键词匹配 + GLM)
+- ✅ 活跃度分级(active <3 天 / moderate / inactive >30 天)
+- ✅ Premium 用户识别
+
+### 🎛️ 运维能力
+- ✅ 多 TG 账号轮换(account_manager)
+- ✅ FloodWait 自动切换账号
+- ✅ 代理池(proxy_mgr,给 GitHub/requests 用)
+- ✅ 断点续传(`channels.last_message_id`)
+- ✅ 任务状态机(`pipeline_state.json` 单一权威)
+- ✅ 任务 stop / force_stop 接口
+- ✅ 每阶段独立运行(任意阶段都能单独跑)
+- ✅ 全链路 pipeline(7 阶段自动调度)
+- ✅ skip_phases 参数(跳过指定阶段)
+- ✅ 测试模式(item_limit / message_limit 限流)
+- ✅ 实时日志 + 进度回调
+- ✅ 任务并发保护(同类型不能叠跑)
+
+### 🖥️ 前端能力
+- ✅ 任务启动/停止/预览
+- ✅ 数据总览仪表盘
+- ✅ 采集数据表格(默认按 created_at desc)
+- ✅ 种子频道管理(增删改查)
+- ✅ 候选网页管理
+- ✅ 系统配置(种子 / 关键词 / 流水线阶段 tab)
+- ✅ 系统状态(运行日志 / 监控)
+- ✅ 任务历史(7 个阶段独立按钮)
+
+### 🗄️ 数据管理
+- ✅ merchants_raw / merchants_clean 桶分离
+- ✅ Merchant 视图兼容(只读 UNION)
+- ✅ promote_merchant helper(原子跨桶迁移,保留 id)
+- ✅ managed_settings 11 条运行参数(hot-reload 和 new_task 两种生效级别)
+- ✅ config_revisions 审计日志
+- ✅ 自动备份(`backup/` 目录 14+ 份历史快照)
+
+---
+
+## 六、可用任务类型(API `/api/tasks/start`)
+
+| task_type | 用途 | 独占 |
+|---|---|---|
+| `full` | 跑完整 pipeline(7 阶段) | 是(全局空闲才能跑) |
+| `discover` | Phase 1 单跑 | 否 |
+| `search` | Phase 2 单跑 | 否 |
+| `github` | Phase 3 单跑 | 否 |
+| `scrape` | Phase 4 单跑 | 否 |
+| `crawl` | Phase 5 单跑 | 否 |
+| `clean` | Phase 6 单跑 | 否 |
+| `score` | Phase 7 单跑 | 否 |
+
+**参数**:
+- `target`: 可选目标(频道名 / 关键词)
+- `test_run`: 测试模式
+  - `item_limit`: 每轮处理条数上限
+  - `message_limit`: 每频道消息上限
+- `skip_phases`: 每任务覆盖全局默认的 skip_phases
+
+---
+
+## 七、外部依赖
+
+| 依赖 | 用途 | 状态 |
+|---|---|---|
+| Telethon + TG API | Phase 4 / Phase 6 | account_01/02 FloodWait 10-20h |
+| Serper API (Google Search) | Phase 2 | **key 失效,400 错误** |
+| GitHub Search API | Phase 3 | 无 token,10 req/min |
+| GLM API | 频道评估 / 消息解析 / 行业分类 / 导航站判断 | 正常 |
+| t.me 网页 | 死号预检 | 正常,无限速 |
+| requests / cloudscraper / playwright | Phase 5 | 正常 |
+
+---
+
+## 八、当前 managed_settings(11 条)
+
+| key | value | type | effect_level |
+|---|---|---|---|
+| `pipeline.skip_phases` | `["scrape"]` | json | new_task |
+| `pipeline.checkpoint_interval` | 30 | int | runtime |
+| `tg_scraper.message_limit_per_channel` | 500 | int | runtime |
+| `tg_scraper.delay_per_verify` | 3.0 | float | runtime |
+| `clean.timeout_seconds` | 3600 | int | runtime |
+| `search.timeout_seconds` | 3600 | int | runtime |
+| `snowball.max_channels_per_layer` | 200 | int | runtime |
+| `snowball.max_channels_total` | 500 | int | runtime |
+| `tme_validator.enabled` | true | bool | runtime |
+| `tme_validator.rate_per_min` | 60 | int | runtime |
+| `tme_validator.concurrency` | 10 | int | runtime |
+
+**effect_level 说明**:
+- `runtime`: 改了立即生效
+- `new_task`: 只对下一个新任务生效
+
+---
+
+## 九、当前数据基线 (2026-04-08)
+
+| 表 | 记录数 |
+|---|---|
+| managed_seeds | 1(`bbs3000`) |
+| managed_keywords | 108 |
+| managed_settings | 11 |
+| channels | 1907+ |
+| nav_sites | 646(pending=0, scraped=156, filtered=490) |
+| **merchants_raw** | **1946**(raw=1571, group=308, glm_parsed=67) |
+| **merchants_clean** | **125**(valid=62, invalid=34, duplicate=27, bot=2) |
+| scored 商户 | 448 |
+
+---
+
+## 十、已知限制
+
+1. **单账号/双账号 TG API 限速严重** — Phase 4/6 容易卡 13-20 小时
+2. **Serper API key 失效** — Phase 2 完全失效
+3. **GitHub 无 token** — Phase 3 rate limit 10 req/min
+4. **cleaner `_verify_merchant` 没有独立节流** — 是触发 FloodWait 的根因
+5. **raw 桶新数据(web 来源)merchant_name 污染** — site_scraper 把 HTML 标签写入 merchant_name
+
+---
+
+## 十一、系统能力边界(不能做什么)
+
+- ❌ 不能绕过 TG 限速(必须等 FloodWait 或加账号)
+- ❌ 不能解析 TG 加密群组内容(只能公开频道)
+- ❌ 不能抓 TG Premium 专享内容
+- ❌ 不能识别图片/视频里的联系方式(只处理文本)
+- ❌ 不能通过 TG 建立会话发消息(纯读)
+- ❌ 不能处理 JS 渲染的单页应用(除非 Playwright fallback)
+- ❌ 不能自动注册 TG 账号(需要手机号+验证码)
+
+---
+
+## 十二、2026-04-07/08 修复的 9 个 bug
+
+| # | 修复 | commit |
+|---|---|---|
+| 1 | crawl GLM 超时 + 进度日志 | `6380f43` |
+| 2 | cleaner FloodWait 无限 sleep | `6380f43` |
+| 3 | score use_glm 强制 True hang | `6380f43` |
+| 4 | merchants 默认排序按 created_at desc | `04f2d7e` |
+| 5 | cleaner 非中文规则误杀 web 数据 | `625b0e3` |
+| 6 | config.py bootstrap GET 回填 | `21e6e29` |
+| 7 | snowball 加每层 200 + 总数 500 上限 | `50b580d` |
+| 8 | t.me 死号预检模块 | `027895a` |
+| 9 | tme_validator lint 修复 | `f0456c7` |
+
+---
+
+## 十三、运维与回滚
+
+### 服务器信息
+- **IP**: 23.95.10.148 (RackNerd VPS)
+- **SSH**: `root / 4e8F2McWxRC7iEa0b4`
+- **代码**: `/opt/tg-lead-scraper/`
+- **虚拟环境**: `venv/`
+- **服务**: `tg-lead-scraper-api` (systemd)
+- **对外端口**: 8134 (Nginx 反代 → 127.0.0.1:8900)
+- **API 认证**: `admin / admin` (Basic Auth)
+
+### 日志位置
+- **Pipeline 执行日志**: `/opt/tg-lead-scraper/logs/scraper_YYYY-MM-DD.log`
+- **HTTP 日志**: `journalctl -u tg-lead-scraper-api`
+- **Pipeline 状态**: `/opt/tg-lead-scraper/data/pipeline_state.json`
+
+### DB 备份
+位于 `/opt/tg-lead-scraper/backup/`:
+- `leads_pre_refactor_20260407_151503.db` — 7 批重构前
+- `leads_pre_batch5_20260407_162334.db` — 表拆分前
+- `leads_pre_rollback_20260408_041132.db` — cleaner 规则回滚前
+- `leads_pre_dead_purge_20260408_094432.db` — 死号清理前
+- `seeds_backup_20260407_142211.json/.sql` — 原 17 个种子专项备份
+- `seeds_full_snapshot_20260407_142211.db` — 种子专项全库快照
+
+### 前端部署
+```bash
+cd /Users/admin/claude/tg-lead-scraper-frontend
+npm run build
+rsync -az --delete dist/ root@23.95.10.148:/var/www/tg-lead-scraper/
+```
+
+### Git tag
+- `v7-crawl-clean-score-fixes` — 2026-04-08 凌晨修复基线
+
+### 紧急回滚
+```bash
+# 后端代码回滚到 tag
+cd /opt/tg-lead-scraper && git reset --hard v7-crawl-clean-score-fixes
+systemctl restart tg-lead-scraper-api
+
+# DB 回滚
+cp backup/leads_pre_refactor_20260407_151503.db data/leads.db
+
+# 前端回滚
+cd /Users/admin/claude/tg-lead-scraper-frontend
+git reset --hard <tag>
+npm run build
+rsync -az --delete dist/ root@23.95.10.148:/var/www/tg-lead-scraper/
+```
+
+---
+
+## 十四、开发规范(重要)
+
+### 写入 merchant 的规则
+- **新增 merchant** → 只能用 `MerchantRaw(...)`,不能用 `Merchant(...)`
+- **修改 merchant status 跨桶**(raw→valid)→ 用 `promote_merchant()` helper
+- **修改 merchant status 桶内**(raw→glm_parsed)→ 直接改对应桶 ORM 对象
+- **按 id 查询未知桶** → 用 `merchant_by_id(session, id)`
+- **只读聚合查询** → `session.query(Merchant)`(指向视图,零改动)
+
+### Commit 规范
+```bash
+git -c user.email=refactor@local -c user.name=refactor commit -m "..."
+```
+
+### 配置权威
+- **种子**: `managed_seeds` 表(不是 config.yaml)
+- **关键词**: `managed_keywords` 表
+- **运行参数**: `managed_settings` 表(通过 `/api/config/settings` 改)
+- config.yaml 里的 `seed_channels`/`keywords` 已废弃,启动时打警告