tme_validator.go 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. package crawler
  2. import (
  3. "context"
  4. "fmt"
  5. "net/http"
  6. "strings"
  7. "time"
  8. )
  9. // TMeValidator t.me 死号预检
  10. type TMeValidator struct {
  11. http *http.Client
  12. }
  13. // NewTMeValidator 创建 TMeValidator
  14. func NewTMeValidator() *TMeValidator {
  15. return &TMeValidator{
  16. http: &http.Client{
  17. Timeout: 10 * time.Second,
  18. CheckRedirect: func(req *http.Request, via []*http.Request) error {
  19. return nil // 跟随重定向
  20. },
  21. },
  22. }
  23. }
  24. // IsAlive 检查 TG 用户名是否活跃
  25. // 通过抓取 t.me/{username} 网页,检查是否包含头像元素来判断
  26. func (v *TMeValidator) IsAlive(ctx context.Context, username string) bool {
  27. url := fmt.Sprintf("https://t.me/%s", username)
  28. req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
  29. if err != nil {
  30. return false
  31. }
  32. req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1)")
  33. resp, err := v.http.Do(req)
  34. if err != nil {
  35. return false
  36. }
  37. defer resp.Body.Close()
  38. if resp.StatusCode == 404 {
  39. return false
  40. }
  41. // 读取部分响应体(前 10KB 足够判断)
  42. buf := make([]byte, 10240)
  43. n, _ := resp.Body.Read(buf)
  44. body := string(buf[:n])
  45. // 有效账号的页面包含头像相关的 HTML 元素
  46. if strings.Contains(body, "tgme_page_photo") || strings.Contains(body, "og:image") {
  47. return true
  48. }
  49. if strings.Contains(body, "tgme_page_description") {
  50. return true
  51. }
  52. return false
  53. }