| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- package pipeline
- import (
- "context"
- "log"
- "math"
- "gorm.io/gorm"
- "spider/internal/model"
- )
- // ScorePhase Phase 7: 评分
- type ScorePhase struct {
- db *gorm.DB
- reporter ProgressReporter
- }
- // NewScorePhase creates a new ScorePhase.
- func NewScorePhase(db *gorm.DB) *ScorePhase {
- return &ScorePhase{db: db}
- }
- func (p *ScorePhase) Name() string { return "score" }
- func (p *ScorePhase) SetReporter(r ProgressReporter) { p.reporter = r }
- func (p *ScorePhase) Run(ctx context.Context, task *model.Task, opts *Options) error {
- var merchants []model.MerchantClean
- p.db.Where("status = ?", "valid").Find(&merchants)
- total := len(merchants)
- log.Printf("[score] scoring %d valid merchants", total)
- for i, m := range merchants {
- if isContextDone(ctx) {
- break
- }
- if p.reporter != nil {
- p.reporter("score", i+1, total, "评分: @"+m.TgUsername)
- }
- score := calcScore(m)
- p.db.Model(&m).Update("quality_score", score)
- }
- log.Printf("[score] done")
- return nil
- }
- // calcScore 6 维度加权打分 (0-100)
- func calcScore(m model.MerchantClean) float64 {
- // 维度1: member_count (权重 0.25)
- memberScore := memberCountScore(m.MemberCount)
- // 维度2: premium (权重 0.15)
- premiumScore := 0.0
- if m.IsPremium {
- premiumScore = 100.0
- }
- // 维度3: activity (权重 0.25)
- activityScore := activityLevelScore(m.ActiveLevel)
- // 维度4: multi_source (权重 0.20)
- multiScore := multiSourceScore(m.SourceCount)
- // 维度5: has_website (权重 0.10)
- websiteScore := 0.0
- if m.Website != "" {
- websiteScore = 100.0
- }
- // 维度6: has_email (权重 0.05)
- emailScore := 0.0
- if m.Email != "" {
- emailScore = 100.0
- }
- total := memberScore*0.25 + premiumScore*0.15 + activityScore*0.25 +
- multiScore*0.20 + websiteScore*0.10 + emailScore*0.05
- return math.Round(total*100) / 100
- }
- func memberCountScore(count int) float64 {
- switch {
- case count >= 100000:
- return 100
- case count >= 10000:
- return 80
- case count >= 1000:
- return 50
- case count >= 100:
- return 30
- default:
- return 10
- }
- }
- func activityLevelScore(level string) float64 {
- switch level {
- case "active":
- return 100
- case "moderate":
- return 50
- case "inactive":
- return 20
- default:
- return 0
- }
- }
- func multiSourceScore(count int) float64 {
- switch {
- case count >= 4:
- return 100
- case count == 3:
- return 70
- case count == 2:
- return 40
- default:
- return 10
- }
- }
|