| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- package processor
- import (
- "strings"
- "spider/internal/model"
- )
- // TagAndGradeWithConfig assigns industry_tag and level using configurable rules.
- func TagAndGradeWithConfig(merchants []MergedMerchant, cfg *model.GradingConfig) []TaggedMerchant {
- var result []TaggedMerchant
- for _, m := range merchants {
- tagged := TaggedMerchant{Merged: m}
- // Industry matching from config
- text := strings.ToLower(m.Best.MerchantName + " " + m.Best.OriginalText)
- for industry, keywords := range cfg.IndustryKeywords {
- for _, kw := range keywords {
- if strings.Contains(text, strings.ToLower(kw)) {
- tagged.IndustryTag = industry
- break
- }
- }
- if tagged.IndustryTag != "" {
- break
- }
- }
- // Inherit from raw if no match
- if tagged.IndustryTag == "" && m.Best.IndustryTag != "" {
- tagged.IndustryTag = m.Best.IndustryTag
- }
- // Level grading from config rules
- tagged.Level = gradeByRules(m, tagged.IndustryTag, cfg.Levels)
- result = append(result, tagged)
- }
- return result
- }
- // gradeByRules evaluates levels in order; first level with any matching rule wins.
- // Last level (usually "Cold") is the fallback when it has no rules.
- func gradeByRules(m MergedMerchant, industryTag string, levels []model.LevelDef) string {
- hasIndustry := industryTag != ""
- hasWebsite := m.Best.Website != ""
- hasEmail := m.Best.Email != ""
- hasPhone := m.Best.Phone != ""
- sourceCount := m.SourceCount
- for _, level := range levels {
- if len(level.Rules) == 0 {
- // No rules = fallback level (last one)
- continue
- }
- for _, rule := range level.Rules {
- if matchRule(rule, hasIndustry, hasWebsite, hasEmail, hasPhone, sourceCount) {
- return level.Key
- }
- }
- }
- // Return the last level as default fallback
- if len(levels) > 0 {
- return levels[len(levels)-1].Key
- }
- return "Cold"
- }
- func matchRule(rule model.GradeRule, hasIndustry, hasWebsite, hasEmail, hasPhone bool, sourceCount int) bool {
- if rule.HasIndustry != nil && *rule.HasIndustry != hasIndustry {
- return false
- }
- if rule.HasWebsite != nil && *rule.HasWebsite != hasWebsite {
- return false
- }
- if rule.HasEmail != nil && *rule.HasEmail != hasEmail {
- return false
- }
- if rule.HasPhone != nil && *rule.HasPhone != hasPhone {
- return false
- }
- if rule.MinSourceCount != nil && sourceCount < *rule.MinSourceCount {
- return false
- }
- return true
- }
- // TagAndGrade is the backward-compatible version using default config.
- func TagAndGrade(merchants []MergedMerchant) []TaggedMerchant {
- cfg := model.DefaultGradingConfig()
- return TagAndGradeWithConfig(merchants, &cfg)
- }
- // TaggedMerchant is the final output of the processor.
- type TaggedMerchant struct {
- Merged MergedMerchant
- IndustryTag string
- Level string
- }
|