Your Name 5 ngày trước cách đây
mục cha
commit
f94966de16

+ 37 - 16
internal/plugins/tgcollector/collector.go

@@ -293,17 +293,37 @@ func (c *Collector) scrapeChannel(ctx context.Context, username string, msgLimit
 }
 
 func (c *Collector) processMessages(ctx context.Context, msgs []telegram.Message, channelUsername string, callback func(plugin.MerchantData)) {
+	channelLower := strings.ToLower(channelUsername)
+
 	for _, msg := range msgs {
-		if msg.IsService || msg.Text == "" {
+		if msg.IsService {
+			continue
+		}
+
+		// ── Path 1: sender-based (group chats) ──
+		// If the message has a sender username, record that person directly.
+		// This is the correct way to collect users who post in a chat group.
+		if msg.SenderUsername != "" && strings.ToLower(msg.SenderUsername) != channelLower {
+			md := plugin.MerchantData{
+				TgUsername:    msg.SenderUsername,
+				TgLink:        "https://t.me/" + msg.SenderUsername,
+				SourceType:    "tg_group",
+				SourceName:    channelUsername,
+				SourceURL:     "https://t.me/" + channelUsername,
+				OriginalText:  msg.Text,
+				GroupUsername: channelUsername,
+			}
+			c.logger.LogMerchantFound(md, "tg_sender", 0, "tg://"+channelUsername)
+			callback(md)
 			continue
 		}
-		// Relaxed: allow messages with any contact info even without Chinese
-		// Many merchants post in English or mixed language
-		if !extractor.HasContact(msg.Text) {
+
+		// ── Path 2: text-based extraction (broadcast channels) ──
+		// Only run when there's no sender (broadcast channel posts) and text is non-empty.
+		if msg.Text == "" || !extractor.HasContact(msg.Text) {
 			continue
 		}
 
-		// Regex first
 		info := extractor.Extract(msg.Text)
 		merchantName := ""
 		industry := ""
@@ -327,22 +347,23 @@ func (c *Collector) processMessages(ctx context.Context, msgs []telegram.Message
 			}
 		}
 
-		if info.TgUsername == "" {
+		// Skip if still no username, or if it's the channel itself (self-referencing link)
+		if info.TgUsername == "" || strings.ToLower(info.TgUsername) == channelLower {
 			continue
 		}
 
 		md := plugin.MerchantData{
 			TgUsername:    info.TgUsername,
-			TgLink:       "https://t.me/" + info.TgUsername,
-			MerchantName: merchantName,
-			Website:      info.Website,
-			Email:        info.Email,
-			Phone:        info.Phone,
-			SourceType:   "tg_channel",
-			SourceName:   channelUsername,
-			SourceURL:    "https://t.me/" + channelUsername,
-			OriginalText: msg.Text,
-			IndustryTag:  industry,
+			TgLink:        "https://t.me/" + info.TgUsername,
+			MerchantName:  merchantName,
+			Website:       info.Website,
+			Email:         info.Email,
+			Phone:         info.Phone,
+			SourceType:    "tg_channel",
+			SourceName:    channelUsername,
+			SourceURL:     "https://t.me/" + channelUsername,
+			OriginalText:  msg.Text,
+			IndustryTag:   industry,
 			GroupUsername: channelUsername,
 		}
 		c.logger.LogMerchantFound(md, "tg_message_extract", 0, "tg://"+channelUsername)

+ 21 - 7
internal/telegram/client.go

@@ -1125,20 +1125,36 @@ func (c *Client) resolveInputPeer(ctx context.Context, api *tg.Client, username
 	return &tg.InputPeerEmpty{}, nil
 }
 
-// extractMessages extracts messages from a MessagesMessagesClass
+// extractMessages extracts messages from a MessagesMessagesClass.
+// It also resolves sender usernames from the Users map included in the response.
 func extractMessages(result tg.MessagesMessagesClass) []Message {
 	var rawMsgs []tg.MessageClass
+	var rawUsers []tg.UserClass
+
 	switch v := result.(type) {
 	case *tg.MessagesMessages:
 		rawMsgs = v.Messages
+		rawUsers = v.Users
 	case *tg.MessagesMessagesSlice:
 		rawMsgs = v.Messages
+		rawUsers = v.Users
 	case *tg.MessagesChannelMessages:
 		rawMsgs = v.Messages
+		rawUsers = v.Users
 	case *tg.MessagesMessagesNotModified:
 		return nil
 	}
 
+	// Build userID → username map from the Users list bundled in the response.
+	usernames := make(map[int64]string, len(rawUsers))
+	for _, u := range rawUsers {
+		if user, ok := u.(*tg.User); ok {
+			if name, ok := user.GetUsername(); ok && name != "" {
+				usernames[user.GetID()] = name
+			}
+		}
+	}
+
 	var msgs []Message
 	for _, raw := range rawMsgs {
 		switch m := raw.(type) {
@@ -1149,12 +1165,10 @@ func extractMessages(result tg.MessagesMessagesClass) []Message {
 				IsService: false,
 			}
 
-			// Extract forward source channel username
-			if fwd, ok := m.GetFwdFrom(); ok {
-				if fromID, ok := fwd.GetFromID(); ok {
-					if peerCh, ok := fromID.(*tg.PeerChannel); ok {
-						_ = peerCh // We'd need channel map to resolve username; skip for now
-					}
+			// Resolve sender username (only set for group chats, not broadcast channels).
+			if fromID, ok := m.GetFromID(); ok {
+				if pu, ok := fromID.(*tg.PeerUser); ok {
+					msg.SenderUsername = usernames[pu.UserID]
 				}
 			}
 

+ 3 - 2
internal/telegram/types.go

@@ -33,9 +33,10 @@ type ChannelInfo struct {
 type Message struct {
 	ID                 int
 	Text               string
-	ForwardFromChannel string // forward 来源频道用户名
+	SenderUsername     string   // 发送者的 @username(群聊中有值,频道消息为空)
+	ForwardFromChannel string   // forward 来源频道用户名
 	Links              []string // 消息中的 t.me 链接
-	IsService          bool // 系统消息
+	IsService          bool     // 系统消息
 }
 
 // UserInfo TG 用户信息(验证商户时用)