package pipeline import ( "context" "log" "math" "gorm.io/gorm" "spider/internal/model" ) // ScorePhase Phase 7: 评分 type ScorePhase struct { db *gorm.DB reporter ProgressReporter } // NewScorePhase creates a new ScorePhase. func NewScorePhase(db *gorm.DB) *ScorePhase { return &ScorePhase{db: db} } func (p *ScorePhase) Name() string { return "score" } func (p *ScorePhase) SetReporter(r ProgressReporter) { p.reporter = r } func (p *ScorePhase) Run(ctx context.Context, task *model.Task, opts *Options) error { var merchants []model.MerchantClean p.db.Where("status = ?", "valid").Find(&merchants) total := len(merchants) log.Printf("[score] scoring %d valid merchants", total) for i, m := range merchants { if isContextDone(ctx) { break } if p.reporter != nil { p.reporter("score", i+1, total, "评分: @"+m.TgUsername) } score := calcScore(m) p.db.Model(&m).Update("quality_score", score) } log.Printf("[score] done") return nil } // calcScore 6 维度加权打分 (0-100) func calcScore(m model.MerchantClean) float64 { // 维度1: member_count (权重 0.25) memberScore := memberCountScore(m.MemberCount) // 维度2: premium (权重 0.15) premiumScore := 0.0 if m.IsPremium { premiumScore = 100.0 } // 维度3: activity (权重 0.25) activityScore := activityLevelScore(m.ActiveLevel) // 维度4: multi_source (权重 0.20) multiScore := multiSourceScore(m.SourceCount) // 维度5: has_website (权重 0.10) websiteScore := 0.0 if m.Website != "" { websiteScore = 100.0 } // 维度6: has_email (权重 0.05) emailScore := 0.0 if m.Email != "" { emailScore = 100.0 } total := memberScore*0.25 + premiumScore*0.15 + activityScore*0.25 + multiScore*0.20 + websiteScore*0.10 + emailScore*0.05 return math.Round(total*100) / 100 } func memberCountScore(count int) float64 { switch { case count >= 100000: return 100 case count >= 10000: return 80 case count >= 1000: return 50 case count >= 100: return 30 default: return 10 } } func activityLevelScore(level string) float64 { switch level { case "active": return 100 case "moderate": return 50 case "inactive": return 20 default: return 0 } } func multiSourceScore(count int) float64 { switch { case count >= 4: return 100 case count == 3: return 70 case count == 2: return 40 default: return 10 } }