tagger.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. package processor
  2. import (
  3. "strings"
  4. "spider/internal/model"
  5. )
  6. // TagAndGradeWithConfig assigns industry_tag and level using configurable rules.
  7. func TagAndGradeWithConfig(merchants []MergedMerchant, cfg *model.GradingConfig) []TaggedMerchant {
  8. var result []TaggedMerchant
  9. for _, m := range merchants {
  10. tagged := TaggedMerchant{Merged: m}
  11. // Industry matching from config
  12. text := strings.ToLower(m.Best.MerchantName + " " + m.Best.OriginalText)
  13. for industry, keywords := range cfg.IndustryKeywords {
  14. for _, kw := range keywords {
  15. if strings.Contains(text, strings.ToLower(kw)) {
  16. tagged.IndustryTag = industry
  17. break
  18. }
  19. }
  20. if tagged.IndustryTag != "" {
  21. break
  22. }
  23. }
  24. // Inherit from raw if no match
  25. if tagged.IndustryTag == "" && m.Best.IndustryTag != "" {
  26. tagged.IndustryTag = m.Best.IndustryTag
  27. }
  28. // Level grading from config rules
  29. tagged.Level = gradeByRules(m, tagged.IndustryTag, cfg.Levels)
  30. result = append(result, tagged)
  31. }
  32. return result
  33. }
  34. // gradeByRules evaluates levels in order; first level with any matching rule wins.
  35. // Last level (usually "Cold") is the fallback when it has no rules.
  36. func gradeByRules(m MergedMerchant, industryTag string, levels []model.LevelDef) string {
  37. hasIndustry := industryTag != ""
  38. hasWebsite := m.Best.Website != ""
  39. hasEmail := m.Best.Email != ""
  40. hasPhone := m.Best.Phone != ""
  41. sourceCount := m.SourceCount
  42. for _, level := range levels {
  43. if len(level.Rules) == 0 {
  44. // No rules = fallback level (last one)
  45. continue
  46. }
  47. for _, rule := range level.Rules {
  48. if matchRule(rule, hasIndustry, hasWebsite, hasEmail, hasPhone, sourceCount) {
  49. return level.Key
  50. }
  51. }
  52. }
  53. // Return the last level as default fallback
  54. if len(levels) > 0 {
  55. return levels[len(levels)-1].Key
  56. }
  57. return "Cold"
  58. }
  59. func matchRule(rule model.GradeRule, hasIndustry, hasWebsite, hasEmail, hasPhone bool, sourceCount int) bool {
  60. if rule.HasIndustry != nil && *rule.HasIndustry != hasIndustry {
  61. return false
  62. }
  63. if rule.HasWebsite != nil && *rule.HasWebsite != hasWebsite {
  64. return false
  65. }
  66. if rule.HasEmail != nil && *rule.HasEmail != hasEmail {
  67. return false
  68. }
  69. if rule.HasPhone != nil && *rule.HasPhone != hasPhone {
  70. return false
  71. }
  72. if rule.MinSourceCount != nil && sourceCount < *rule.MinSourceCount {
  73. return false
  74. }
  75. return true
  76. }
  77. // TagAndGrade is the backward-compatible version using default config.
  78. func TagAndGrade(merchants []MergedMerchant) []TaggedMerchant {
  79. cfg := model.DefaultGradingConfig()
  80. return TagAndGradeWithConfig(merchants, &cfg)
  81. }
  82. // TaggedMerchant is the final output of the processor.
  83. type TaggedMerchant struct {
  84. Merged MergedMerchant
  85. IndustryTag string
  86. Level string
  87. }