phase7_score.go 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. package pipeline
  2. import (
  3. "context"
  4. "log"
  5. "math"
  6. "gorm.io/gorm"
  7. "spider/internal/model"
  8. )
  9. // ScorePhase Phase 7: 评分
  10. type ScorePhase struct {
  11. db *gorm.DB
  12. reporter ProgressReporter
  13. }
  14. // NewScorePhase creates a new ScorePhase.
  15. func NewScorePhase(db *gorm.DB) *ScorePhase {
  16. return &ScorePhase{db: db}
  17. }
  18. func (p *ScorePhase) Name() string { return "score" }
  19. func (p *ScorePhase) SetReporter(r ProgressReporter) { p.reporter = r }
  20. func (p *ScorePhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
  21. var merchants []model.MerchantClean
  22. p.db.Where("status = ?", "valid").Find(&merchants)
  23. total := len(merchants)
  24. log.Printf("[score] scoring %d valid merchants", total)
  25. for i, m := range merchants {
  26. if isContextDone(ctx) {
  27. break
  28. }
  29. if p.reporter != nil {
  30. p.reporter("score", i+1, total, "评分: @"+m.TgUsername)
  31. }
  32. score := calcScore(m)
  33. p.db.Model(&m).Update("quality_score", score)
  34. }
  35. log.Printf("[score] done")
  36. return nil
  37. }
  38. // calcScore 6 维度加权打分 (0-100)
  39. func calcScore(m model.MerchantClean) float64 {
  40. // 维度1: member_count (权重 0.25)
  41. memberScore := memberCountScore(m.MemberCount)
  42. // 维度2: premium (权重 0.15)
  43. premiumScore := 0.0
  44. if m.IsPremium {
  45. premiumScore = 100.0
  46. }
  47. // 维度3: activity (权重 0.25)
  48. activityScore := activityLevelScore(m.ActiveLevel)
  49. // 维度4: multi_source (权重 0.20)
  50. multiScore := multiSourceScore(m.SourceCount)
  51. // 维度5: has_website (权重 0.10)
  52. websiteScore := 0.0
  53. if m.Website != "" {
  54. websiteScore = 100.0
  55. }
  56. // 维度6: has_email (权重 0.05)
  57. emailScore := 0.0
  58. if m.Email != "" {
  59. emailScore = 100.0
  60. }
  61. total := memberScore*0.25 + premiumScore*0.15 + activityScore*0.25 +
  62. multiScore*0.20 + websiteScore*0.10 + emailScore*0.05
  63. return math.Round(total*100) / 100
  64. }
  65. func memberCountScore(count int) float64 {
  66. switch {
  67. case count >= 100000:
  68. return 100
  69. case count >= 10000:
  70. return 80
  71. case count >= 1000:
  72. return 50
  73. case count >= 100:
  74. return 30
  75. default:
  76. return 10
  77. }
  78. }
  79. func activityLevelScore(level string) float64 {
  80. switch level {
  81. case "active":
  82. return 100
  83. case "moderate":
  84. return 50
  85. case "inactive":
  86. return 20
  87. default:
  88. return 0
  89. }
  90. }
  91. func multiSourceScore(count int) float64 {
  92. switch {
  93. case count >= 4:
  94. return 100
  95. case count == 3:
  96. return 70
  97. case count == 2:
  98. return 40
  99. default:
  100. return 10
  101. }
  102. }