migrate_v2.sql 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. -- Migration: v1 (pipeline) -> v2 (plugin architecture)
  2. -- Compatible with MySQL 5.7+
  3. --
  4. -- BEFORE running:
  5. -- 1. Stop the old application
  6. -- 2. Backup: mysqldump -u root -p spider > spider_backup_$(date +%Y%m%d).sql
  7. -- 1. Create keywords table (merge managed_seeds + managed_keywords)
  8. CREATE TABLE IF NOT EXISTS keywords (
  9. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  10. keyword VARCHAR(255) NOT NULL,
  11. industry_tag VARCHAR(100) DEFAULT '',
  12. enabled TINYINT(1) DEFAULT 1,
  13. created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  14. UNIQUE INDEX idx_keyword (keyword)
  15. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
  16. INSERT IGNORE INTO keywords (keyword, industry_tag, enabled, created_at)
  17. SELECT keyword, COALESCE(category,''), IF(status='active', 1, 0), created_at
  18. FROM managed_keywords;
  19. INSERT IGNORE INTO keywords (keyword, industry_tag, enabled, created_at)
  20. SELECT channel_name, 'seed', IF(status='active', 1, 0), created_at
  21. FROM managed_seeds;
  22. -- 2. Recreate merchants_raw with new schema
  23. CREATE TABLE merchants_raw_v2 (
  24. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  25. tg_username VARCHAR(255) NOT NULL,
  26. tg_link VARCHAR(500) DEFAULT '',
  27. merchant_name VARCHAR(500) DEFAULT '',
  28. website VARCHAR(2048) DEFAULT '',
  29. email VARCHAR(255) DEFAULT '',
  30. phone VARCHAR(100) DEFAULT '',
  31. source_type VARCHAR(50) NOT NULL DEFAULT 'web',
  32. source_name VARCHAR(500) DEFAULT '',
  33. source_url VARCHAR(2048) DEFAULT '',
  34. original_text TEXT,
  35. industry_tag VARCHAR(100) DEFAULT '',
  36. status VARCHAR(20) DEFAULT 'raw',
  37. created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  38. INDEX idx_tg_username (tg_username),
  39. INDEX idx_status (status)
  40. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
  41. INSERT INTO merchants_raw_v2 (id, tg_username, tg_link, merchant_name, website, email, phone,
  42. source_type, source_name, source_url, original_text, industry_tag, status, created_at)
  43. SELECT id, COALESCE(tg_username,''), CONCAT('https://t.me/', COALESCE(tg_username,'')),
  44. COALESCE(merchant_name,''), COALESCE(website,''), COALESCE(email,''), COALESCE(phone,''),
  45. COALESCE(source_type,'web'), '', COALESCE(source_id, ''),
  46. COALESCE(original_message, ''), COALESCE(industry, ''),
  47. CASE WHEN status IN ('raw', 'glm_parsed') THEN 'raw' ELSE 'raw' END,
  48. created_at
  49. FROM merchants_raw;
  50. RENAME TABLE merchants_raw TO merchants_raw_old, merchants_raw_v2 TO merchants_raw;
  51. -- 3. Recreate merchants_clean with new schema
  52. CREATE TABLE merchants_clean_v2 (
  53. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  54. tg_username VARCHAR(255) DEFAULT '',
  55. tg_link VARCHAR(500) DEFAULT '',
  56. merchant_name VARCHAR(500) DEFAULT '',
  57. website VARCHAR(2048) DEFAULT '',
  58. email VARCHAR(255) DEFAULT '',
  59. phone VARCHAR(100) DEFAULT '',
  60. source_count INT DEFAULT 1,
  61. all_sources JSON,
  62. industry_tag VARCHAR(100) DEFAULT '',
  63. level VARCHAR(10) DEFAULT 'Cold',
  64. status VARCHAR(20) NOT NULL DEFAULT 'valid',
  65. is_alive TINYINT(1) DEFAULT 0,
  66. last_checked_at DATETIME,
  67. created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  68. updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  69. UNIQUE INDEX idx_tg_username (tg_username),
  70. INDEX idx_status (status),
  71. INDEX idx_level (level),
  72. INDEX idx_industry_tag (industry_tag)
  73. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
  74. INSERT INTO merchants_clean_v2 (id, tg_username, tg_link, merchant_name, website, email, phone,
  75. source_count, all_sources, industry_tag, level, status, is_alive, created_at, updated_at)
  76. SELECT id, COALESCE(tg_username,''), CONCAT('https://t.me/', COALESCE(tg_username,'')),
  77. COALESCE(merchant_name,''), COALESCE(website,''), COALESCE(email,''), COALESCE(phone,''),
  78. COALESCE(source_count,1), COALESCE(source_links, CAST('[]' AS JSON)),
  79. COALESCE(industry, ''),
  80. CASE
  81. WHEN quality_score >= 60 THEN 'Hot'
  82. WHEN quality_score >= 30 THEN 'Warm'
  83. ELSE 'Cold'
  84. END,
  85. status,
  86. CASE WHEN status = 'valid' THEN 1 ELSE 0 END,
  87. created_at, updated_at
  88. FROM merchants_clean;
  89. RENAME TABLE merchants_clean TO merchants_clean_old, merchants_clean_v2 TO merchants_clean;
  90. -- 4. Recreate channels with new schema (simpler than ALTER for enum->varchar)
  91. CREATE TABLE channels_v2 (
  92. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  93. username VARCHAR(255) NOT NULL,
  94. channel_id BIGINT DEFAULT 0,
  95. access_hash BIGINT DEFAULT 0,
  96. status VARCHAR(20) DEFAULT 'pending',
  97. last_message_id INT DEFAULT 0,
  98. merchants_found INT DEFAULT 0,
  99. source VARCHAR(50) NOT NULL DEFAULT 'search',
  100. created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  101. updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  102. UNIQUE INDEX idx_username (username),
  103. INDEX idx_status (status),
  104. INDEX idx_source (source)
  105. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
  106. INSERT INTO channels_v2 (id, username, status, last_message_id, source, created_at, updated_at)
  107. SELECT id, username, status, last_message_id, source, created_at, updated_at
  108. FROM channels;
  109. RENAME TABLE channels TO channels_old, channels_v2 TO channels;
  110. -- 5. Create task_logs table
  111. CREATE TABLE IF NOT EXISTS task_logs (
  112. id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  113. task_type VARCHAR(50) NOT NULL,
  114. plugin_name VARCHAR(100) DEFAULT '',
  115. status VARCHAR(20) DEFAULT 'pending',
  116. items_processed INT DEFAULT 0,
  117. merchants_added INT DEFAULT 0,
  118. errors_count INT DEFAULT 0,
  119. started_at DATETIME,
  120. finished_at DATETIME,
  121. detail TEXT,
  122. created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
  123. INDEX idx_task_type (task_type),
  124. INDEX idx_status (status)
  125. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
  126. -- Done! Old tables preserved as *_old for safety.
  127. -- After verifying the new system works, run:
  128. -- DROP TABLE IF EXISTS merchants_raw_old;
  129. -- DROP TABLE IF EXISTS merchants_clean_old;
  130. -- DROP TABLE IF EXISTS channels_old;
  131. -- DROP TABLE IF EXISTS managed_seeds;
  132. -- DROP TABLE IF EXISTS managed_keywords;
  133. -- DROP TABLE IF EXISTS managed_settings;
  134. -- DROP TABLE IF EXISTS config_revisions;
  135. -- DROP TABLE IF EXISTS nav_sites;
  136. -- DROP TABLE IF EXISTS tasks;