dot 6 天之前
父節點
當前提交
6e2ef3187a

File diff suppressed because it is too large
+ 0 - 141
backup/spider_backup_20260429_210424.sql


+ 0 - 323
backup/spider_pre_v2_20260410_194440.sql

@@ -1,323 +0,0 @@
--- MySQL dump 10.13  Distrib 8.0.45, for Linux (x86_64)
---
--- Host: localhost    Database: spider
--- ------------------------------------------------------
--- Server version	8.0.45
-
-/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
-/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
-/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
-/*!50503 SET NAMES utf8mb4 */;
-/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
-/*!40103 SET TIME_ZONE='+00:00' */;
-/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
-/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
-/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
-/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
-
---
--- Table structure for table `channels`
---
-
-DROP TABLE IF EXISTS `channels`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `channels` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `username` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `title` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `member_count` bigint DEFAULT '0',
-  `about` text COLLATE utf8mb4_unicode_ci,
-  `source` enum('seed','snowball','search','github') COLLATE utf8mb4_unicode_ci NOT NULL,
-  `source_detail` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `status` enum('pending','scraped','failed','skipped') COLLATE utf8mb4_unicode_ci DEFAULT 'pending',
-  `last_message_id` bigint DEFAULT '0',
-  `relevance_score` double DEFAULT NULL,
-  `created_at` datetime(3) DEFAULT NULL,
-  `updated_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `idx_channels_username` (`username`),
-  KEY `idx_channels_source` (`source`),
-  KEY `idx_channels_status` (`status`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `channels`
---
-
-LOCK TABLES `channels` WRITE;
-/*!40000 ALTER TABLE `channels` DISABLE KEYS */;
-/*!40000 ALTER TABLE `channels` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `config_revisions`
---
-
-DROP TABLE IF EXISTS `config_revisions`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `config_revisions` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `setting_key` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `old_value` text COLLATE utf8mb4_unicode_ci,
-  `new_value` text COLLATE utf8mb4_unicode_ci,
-  `changed_by` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT 'admin',
-  `created_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  KEY `idx_config_revisions_setting_key` (`setting_key`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `config_revisions`
---
-
-LOCK TABLES `config_revisions` WRITE;
-/*!40000 ALTER TABLE `config_revisions` DISABLE KEYS */;
-/*!40000 ALTER TABLE `config_revisions` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `managed_keywords`
---
-
-DROP TABLE IF EXISTS `managed_keywords`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `managed_keywords` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `keyword` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `category` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `status` enum('active','inactive') COLLATE utf8mb4_unicode_ci DEFAULT 'active',
-  `created_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `idx_managed_keywords_keyword` (`keyword`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `managed_keywords`
---
-
-LOCK TABLES `managed_keywords` WRITE;
-/*!40000 ALTER TABLE `managed_keywords` DISABLE KEYS */;
-/*!40000 ALTER TABLE `managed_keywords` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `managed_seeds`
---
-
-DROP TABLE IF EXISTS `managed_seeds`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `managed_seeds` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `channel_name` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `status` enum('active','inactive') COLLATE utf8mb4_unicode_ci DEFAULT 'active',
-  `note` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `created_at` datetime(3) DEFAULT NULL,
-  `updated_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `idx_managed_seeds_channel_name` (`channel_name`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `managed_seeds`
---
-
-LOCK TABLES `managed_seeds` WRITE;
-/*!40000 ALTER TABLE `managed_seeds` DISABLE KEYS */;
-/*!40000 ALTER TABLE `managed_seeds` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `managed_settings`
---
-
-DROP TABLE IF EXISTS `managed_settings`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `managed_settings` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `key_name` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `value` text COLLATE utf8mb4_unicode_ci NOT NULL,
-  `value_type` enum('int','float','bool','string','json') COLLATE utf8mb4_unicode_ci NOT NULL,
-  `effect_level` enum('runtime','new_task') COLLATE utf8mb4_unicode_ci DEFAULT 'runtime',
-  `description` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `updated_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `idx_managed_settings_key_name` (`key_name`)
-) ENGINE=InnoDB AUTO_INCREMENT=14 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `managed_settings`
---
-
-LOCK TABLES `managed_settings` WRITE;
-/*!40000 ALTER TABLE `managed_settings` DISABLE KEYS */;
-INSERT INTO `managed_settings` VALUES (1,'pipeline.skip_phases','[]','json','new_task','默认跳过的阶段','2026-04-09 12:40:53.888'),(2,'pipeline.checkpoint_interval','30','int','runtime','进度上报间隔(秒)','2026-04-09 12:40:53.900'),(3,'tg_scraper.message_limit_per_channel','500','int','runtime','每频道最大消息数','2026-04-09 12:40:53.915'),(4,'tg_scraper.delay_per_message','1.0','float','runtime','消息间延迟(秒)','2026-04-09 12:40:53.925'),(5,'tg_scraper.delay_per_channel','5.0','float','runtime','频道间延迟(秒)','2026-04-09 12:40:53.934'),(6,'tg_scraper.delay_per_verify','3.0','float','runtime','验证间延迟(秒)','2026-04-09 12:40:53.944'),(7,'clean.timeout_seconds','3600','int','runtime','清洗阶段超时(秒)','2026-04-09 12:40:53.954'),(8,'search.timeout_seconds','3600','int','runtime','搜索阶段超时(秒)','2026-04-09 12:40:53.964'),(9,'snowball.max_channels_per_layer','200','int','runtime','每层最大频道数','2026-04-09 12:40:53.974'),(10,'snowball.max_channels_total','500','int','runtime','总最大频道数','2026-04-09 12:40:53.983'),(11,'tme_validator.enabled','true','bool','runtime','启用t.me死号预检','2026-04-09 12:40:53.993'),(12,'tme_validator.rate_per_min','60','int','runtime','预检限速(次/分)','2026-04-09 12:40:54.006'),(13,'tme_validator.concurrency','10','int','runtime','预检并发数','2026-04-09 12:40:54.015');
-/*!40000 ALTER TABLE `managed_settings` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `merchant_cleans`
---
-
-DROP TABLE IF EXISTS `merchant_cleans`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `merchant_cleans` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `raw_id` bigint unsigned DEFAULT NULL,
-  `merchant_name` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `tg_username` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `website` varchar(2048) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `email` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `phone` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `industry` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `status` enum('valid','invalid','bot','duplicate','group') COLLATE utf8mb4_unicode_ci NOT NULL,
-  `tg_first_name` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `tg_last_name` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `is_premium` tinyint(1) DEFAULT '0',
-  `last_online` datetime(3) DEFAULT NULL,
-  `active_level` enum('active','moderate','inactive') COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `member_count` bigint DEFAULT '0',
-  `quality_score` double DEFAULT '0',
-  `source_count` bigint DEFAULT '1',
-  `source_links` json DEFAULT NULL,
-  `created_at` datetime(3) DEFAULT NULL,
-  `updated_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `idx_merchant_cleans_tg_username` (`tg_username`),
-  KEY `idx_merchant_cleans_raw_id` (`raw_id`),
-  KEY `idx_merchant_cleans_industry` (`industry`),
-  KEY `idx_merchant_cleans_status` (`status`),
-  KEY `idx_merchant_cleans_quality_score` (`quality_score`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `merchant_cleans`
---
-
-LOCK TABLES `merchant_cleans` WRITE;
-/*!40000 ALTER TABLE `merchant_cleans` DISABLE KEYS */;
-/*!40000 ALTER TABLE `merchant_cleans` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `merchant_raws`
---
-
-DROP TABLE IF EXISTS `merchant_raws`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `merchant_raws` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `merchant_name` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `tg_username` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `website` varchar(2048) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `email` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `phone` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `industry` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `source_type` enum('tg_scrape','web_crawl','github') COLLATE utf8mb4_unicode_ci NOT NULL,
-  `source_id` varchar(500) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `original_message` text COLLATE utf8mb4_unicode_ci,
-  `status` enum('raw','glm_parsed') COLLATE utf8mb4_unicode_ci DEFAULT 'raw',
-  `created_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  KEY `idx_merchant_raws_tg_username` (`tg_username`),
-  KEY `idx_merchant_raws_status` (`status`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `merchant_raws`
---
-
-LOCK TABLES `merchant_raws` WRITE;
-/*!40000 ALTER TABLE `merchant_raws` DISABLE KEYS */;
-/*!40000 ALTER TABLE `merchant_raws` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `nav_sites`
---
-
-DROP TABLE IF EXISTS `nav_sites`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `nav_sites` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `url` varchar(2048) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `domain` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `source` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `status` enum('pending','scraped','filtered','failed') COLLATE utf8mb4_unicode_ci DEFAULT 'pending',
-  `filter_reason` varchar(255) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
-  `merchant_count` bigint DEFAULT '0',
-  `created_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `idx_url` (`url`(500)),
-  KEY `idx_nav_sites_domain` (`domain`),
-  KEY `idx_nav_sites_status` (`status`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `nav_sites`
---
-
-LOCK TABLES `nav_sites` WRITE;
-/*!40000 ALTER TABLE `nav_sites` DISABLE KEYS */;
-/*!40000 ALTER TABLE `nav_sites` ENABLE KEYS */;
-UNLOCK TABLES;
-
---
--- Table structure for table `tasks`
---
-
-DROP TABLE IF EXISTS `tasks`;
-/*!40101 SET @saved_cs_client     = @@character_set_client */;
-/*!50503 SET character_set_client = utf8mb4 */;
-CREATE TABLE `tasks` (
-  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
-  `task_type` enum('full','discover','search','github','scrape','crawl','clean','score') COLLATE utf8mb4_unicode_ci NOT NULL,
-  `status` enum('pending','running','completed','failed','stopped') COLLATE utf8mb4_unicode_ci DEFAULT 'pending',
-  `params` json DEFAULT NULL,
-  `progress` json DEFAULT NULL,
-  `result` json DEFAULT NULL,
-  `error_msg` text COLLATE utf8mb4_unicode_ci,
-  `started_at` datetime(3) DEFAULT NULL,
-  `finished_at` datetime(3) DEFAULT NULL,
-  `created_at` datetime(3) DEFAULT NULL,
-  PRIMARY KEY (`id`),
-  KEY `idx_tasks_task_type` (`task_type`),
-  KEY `idx_tasks_status` (`status`)
-) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-/*!40101 SET character_set_client = @saved_cs_client */;
-
---
--- Dumping data for table `tasks`
---
-
-LOCK TABLES `tasks` WRITE;
-/*!40000 ALTER TABLE `tasks` DISABLE KEYS */;
-INSERT INTO `tasks` VALUES (1,'full','completed','{\"target\": \"\", \"test_run\": {\"item_limit\": 10, \"message_limit\": 100}, \"task_type\": \"full\", \"skip_phases\": null}',NULL,'{\"message\": \"task completed successfully\"}','','2026-04-09 12:44:49.753','2026-04-09 12:44:50.501','2026-04-09 12:44:49.733'),(2,'score','completed','{\"target\": \"\", \"test_run\": {\"item_limit\": 5, \"message_limit\": 10}, \"task_type\": \"score\", \"skip_phases\": null}',NULL,'{\"message\": \"task completed successfully\"}','','2026-04-09 13:10:50.991','2026-04-09 13:10:51.008','2026-04-09 13:10:50.402'),(3,'score','completed','{\"target\": \"\", \"test_run\": {\"item_limit\": 5, \"message_limit\": 10}, \"task_type\": \"score\", \"skip_phases\": null}',NULL,'{\"message\": \"task completed successfully\"}','','2026-04-09 13:10:56.222','2026-04-09 13:10:56.238','2026-04-09 13:10:55.281'),(4,'score','completed','{\"target\": \"\", \"test_run\": {\"item_limit\": 5, \"message_limit\": 10}, \"task_type\": \"score\", \"skip_phases\": null}',NULL,'{\"message\": \"task completed successfully\"}','','2026-04-09 13:20:42.118','2026-04-09 13:20:42.132','2026-04-09 13:20:42.031'),(5,'discover','completed','{\"target\": \"\", \"test_run\": {\"item_limit\": 10, \"message_limit\": 100}, \"task_type\": \"discover\", \"skip_phases\": null}',NULL,'{\"message\": \"task completed successfully\"}','','2026-04-09 13:36:20.604','2026-04-09 13:36:20.626','2026-04-09 13:36:19.601');
-/*!40000 ALTER TABLE `tasks` ENABLE KEYS */;
-UNLOCK TABLES;
-/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
-
-/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
-/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
-/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
-/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
-/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
-/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
-/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-
--- Dump completed on 2026-04-10 11:44:41

+ 2 - 47
deploy/docker-compose.local.yml

@@ -1,40 +1,4 @@
 services:
-  im_mysql:
-    image: mysql:8.0
-    container_name: im_mysql
-    environment:
-      MYSQL_ROOT_PASSWORD: root123
-      MYSQL_DATABASE: spider
-      MYSQL_CHARACTER_SET_SERVER: utf8mb4
-      MYSQL_COLLATION_SERVER: utf8mb4_unicode_ci
-    ports:
-      - "3307:3306"
-    volumes:
-      - mysql_data:/var/lib/mysql
-    healthcheck:
-      test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-uroot", "-proot123"]
-      interval: 10s
-      timeout: 5s
-      retries: 10
-      start_period: 300s
-    networks:
-      - chat_im_network
-
-  im_redis:
-    image: redis:7-alpine
-    container_name: im_redis
-    ports:
-      - "6379:6379"
-    volumes:
-      - redis_data:/data
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      interval: 10s
-      timeout: 3s
-      retries: 5
-    networks:
-      - chat_im_network
-
   api:
     build:
       context: ..
@@ -48,11 +12,6 @@ services:
     environment:
       - GIN_MODE=release
       - TG_SECRET_KEY=${TG_SECRET_KEY:-d9RkIHw7wLaa5Qx+B7/QOw5K6+bW67Ife4UK91h4Z2Q=}
-    depends_on:
-      im_mysql:
-        condition: service_healthy
-      im_redis:
-        condition: service_healthy
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "wget", "-q", "-O-", "http://localhost:8080/ping"]
@@ -75,7 +34,7 @@ services:
         condition: service_healthy
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "curl", "-sf", "http://localhost:80/"]
+      test: ["CMD", "wget", "-q", "-O", "/dev/null", "http://127.0.0.1/"]
       interval: 30s
       timeout: 5s
       retries: 3
@@ -84,8 +43,4 @@ services:
 
 networks:
   chat_im_network:
-    driver: bridge
-
-volumes:
-  mysql_data:
-  redis_data:
+    external: true

+ 1 - 1
deploy/docker-compose.yml

@@ -41,7 +41,7 @@ services:
         condition: service_healthy
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "wget", "--spider", "-q", "http://localhost:80/"]
+      test: ["CMD", "wget", "-q", "-O", "/dev/null", "http://127.0.0.1/"]
       interval: 30s
       timeout: 5s
       retries: 3

Some files were not shown because too many files changed in this diff