|
@@ -6,6 +6,7 @@ import (
|
|
|
"encoding/base64"
|
|
"encoding/base64"
|
|
|
"fmt"
|
|
"fmt"
|
|
|
"log"
|
|
"log"
|
|
|
|
|
+ "math/rand/v2"
|
|
|
"net"
|
|
"net"
|
|
|
"net/url"
|
|
"net/url"
|
|
|
"regexp"
|
|
"regexp"
|
|
@@ -428,22 +429,35 @@ func (c *Client) GetGroupParticipants(ctx context.Context, username string) ([]G
|
|
|
return nil, err
|
|
return nil, err
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Phase 2: If group has more members than we found, search by character sets to discover more
|
|
|
|
|
|
|
+ // Phase 2: If group has more members than we found, search by character sets to discover more.
|
|
|
|
|
+ // We pace queries with jitter (2–4s) to avoid looking like a bot scanner and triggering FloodWait.
|
|
|
|
|
+ // If FloodWait does hit, stop early and return what we already have — the calling task can
|
|
|
|
|
+ // re-attempt later after the account cools down.
|
|
|
if totalCount > len(allParticipants) && totalCount <= 10000 {
|
|
if totalCount > len(allParticipants) && totalCount <= 10000 {
|
|
|
queries := participantSearchQueries()
|
|
queries := participantSearchQueries()
|
|
|
for _, q := range queries {
|
|
for _, q := range queries {
|
|
|
if ctx.Err() != nil {
|
|
if ctx.Err() != nil {
|
|
|
break
|
|
break
|
|
|
}
|
|
}
|
|
|
|
|
+ if len(allParticipants) >= totalCount {
|
|
|
|
|
+ break // already collected everyone visible
|
|
|
|
|
+ }
|
|
|
beforeCount := len(allParticipants)
|
|
beforeCount := len(allParticipants)
|
|
|
- _ = c.fetchParticipantPages(ctx, api, inputChannel, q, seen, extractUsers, nil)
|
|
|
|
|
|
|
+ err := c.fetchParticipantPages(ctx, api, inputChannel, q, seen, extractUsers, nil)
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ if fwe, ok := err.(*FloodWaitError); ok {
|
|
|
|
|
+ log.Printf("[tg_client] flood wait %ds during search q=%q for %s; returning %d/%d",
|
|
|
|
|
+ fwe.Seconds, q, username, len(allParticipants), totalCount)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ log.Printf("[tg_client] search q=%q for %s: %v (returning partial)", q, username, err)
|
|
|
|
|
+ }
|
|
|
|
|
+ break
|
|
|
|
|
+ }
|
|
|
if len(allParticipants) == beforeCount {
|
|
if len(allParticipants) == beforeCount {
|
|
|
- continue // No new results for this query
|
|
|
|
|
|
|
+ continue // no new results; skip sleep and try next query
|
|
|
}
|
|
}
|
|
|
- select {
|
|
|
|
|
- case <-ctx.Done():
|
|
|
|
|
- return allParticipants, ctx.Err()
|
|
|
|
|
- case <-time.After(300 * time.Millisecond):
|
|
|
|
|
|
|
+ if err := jitterSleep(ctx, 2*time.Second, 4*time.Second); err != nil {
|
|
|
|
|
+ return allParticipants, err
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -452,9 +466,25 @@ func (c *Client) GetGroupParticipants(ctx context.Context, username string) ([]G
|
|
|
return allParticipants, nil
|
|
return allParticipants, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// participantSearchQueries returns search queries covering Latin, Cyrillic, CJK and other scripts.
|
|
|
|
|
|
|
+// jitterSleep sleeps a random duration in [min, max) while respecting ctx.
|
|
|
|
|
+// Returns ctx.Err() if cancelled. Used to spread out TG API calls and avoid
|
|
|
|
|
+// looking like a deterministic scanner.
|
|
|
|
|
+func jitterSleep(ctx context.Context, min, max time.Duration) error {
|
|
|
|
|
+ d := min + time.Duration(rand.Int64N(int64(max-min)))
|
|
|
|
|
+ select {
|
|
|
|
|
+ case <-ctx.Done():
|
|
|
|
|
+ return ctx.Err()
|
|
|
|
|
+ case <-time.After(d):
|
|
|
|
|
+ return nil
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// participantSearchQueries returns search queries covering Latin, Cyrillic, Japanese,
|
|
|
|
|
+// Korean, and CJK scripts. TG's ChannelParticipantsSearch does substring matching on
|
|
|
|
|
+// first_name + last_name + username, so more starter-character coverage = more users
|
|
|
|
|
+// surfaced on groups beyond the 200-per-query cap. Total ~150 queries.
|
|
|
func participantSearchQueries() []string {
|
|
func participantSearchQueries() []string {
|
|
|
- queries := make([]string, 0, 80)
|
|
|
|
|
|
|
+ queries := make([]string, 0, 170)
|
|
|
// Latin a-z
|
|
// Latin a-z
|
|
|
for c := 'a'; c <= 'z'; c++ {
|
|
for c := 'a'; c <= 'z'; c++ {
|
|
|
queries = append(queries, string(c))
|
|
queries = append(queries, string(c))
|
|
@@ -467,11 +497,36 @@ func participantSearchQueries() []string {
|
|
|
for c := 'а'; c <= 'я'; c++ {
|
|
for c := 'а'; c <= 'я'; c++ {
|
|
|
queries = append(queries, string(c))
|
|
queries = append(queries, string(c))
|
|
|
}
|
|
}
|
|
|
- // Common CJK first characters (high frequency Chinese surnames and words)
|
|
|
|
|
- cjk := []string{"王", "李", "张", "刘", "陈", "杨", "黄", "赵", "周", "吴",
|
|
|
|
|
|
|
+ // Japanese Hiragana — common name-starter syllables
|
|
|
|
|
+ queries = append(queries,
|
|
|
|
|
+ "あ", "い", "う", "え", "お",
|
|
|
|
|
+ "か", "さ", "た", "な", "ま",
|
|
|
|
|
+ )
|
|
|
|
|
+ // Korean Hangul — common initial syllables
|
|
|
|
|
+ queries = append(queries,
|
|
|
|
|
+ "가", "나", "다", "라", "마", "바", "사", "아", "자", "차",
|
|
|
|
|
+ "카", "타", "파", "하",
|
|
|
|
|
+ )
|
|
|
|
|
+ // CJK: top Chinese surnames (百家姓 high frequency)
|
|
|
|
|
+ surnames := []string{
|
|
|
|
|
+ "王", "李", "张", "刘", "陈", "杨", "黄", "赵", "周", "吴",
|
|
|
"徐", "孙", "马", "朱", "胡", "林", "何", "高", "郭", "罗",
|
|
"徐", "孙", "马", "朱", "胡", "林", "何", "高", "郭", "罗",
|
|
|
- "大", "小", "新", "老", "中", "天", "金", "一"}
|
|
|
|
|
- queries = append(queries, cjk...)
|
|
|
|
|
|
|
+ "谢", "宋", "唐", "许", "邓", "梁", "韩", "曹", "彭", "余",
|
|
|
|
|
+ "潘", "袁", "蒋", "蔡", "卢", "田", "董", "叶", "程", "姜",
|
|
|
|
|
+ }
|
|
|
|
|
+ queries = append(queries, surnames...)
|
|
|
|
|
+ // CJK: common given-name characters (高频二字名)
|
|
|
|
|
+ given := []string{
|
|
|
|
|
+ "伟", "芳", "娜", "秀", "敏", "静", "丽", "强", "磊", "军",
|
|
|
|
|
+ "洋", "勇", "艳", "杰", "涛", "明", "超", "霞", "平", "刚",
|
|
|
|
|
+ }
|
|
|
|
|
+ queries = append(queries, given...)
|
|
|
|
|
+ // CJK: common modifiers and city prefixes (covers nicknames/titles)
|
|
|
|
|
+ misc := []string{
|
|
|
|
|
+ "大", "小", "新", "老", "中", "天", "金", "一", "龙", "虎",
|
|
|
|
|
+ "京", "沪", "深", "广", "杭", "苏",
|
|
|
|
|
+ }
|
|
|
|
|
+ queries = append(queries, misc...)
|
|
|
return queries
|
|
return queries
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -517,10 +572,9 @@ func (c *Client) fetchParticipantPages(
|
|
|
break
|
|
break
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- select {
|
|
|
|
|
- case <-ctx.Done():
|
|
|
|
|
- return ctx.Err()
|
|
|
|
|
- case <-time.After(500 * time.Millisecond):
|
|
|
|
|
|
|
+ // Page interval: jittered to avoid a detectable request cadence.
|
|
|
|
|
+ if err := jitterSleep(ctx, 800*time.Millisecond, 1500*time.Millisecond); err != nil {
|
|
|
|
|
+ return err
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
return nil
|
|
return nil
|