| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- package crawler
- import (
- "context"
- "fmt"
- "net/http"
- "strings"
- "time"
- )
- // TMeValidator t.me 死号预检
- type TMeValidator struct {
- http *http.Client
- }
- // NewTMeValidator 创建 TMeValidator
- func NewTMeValidator() *TMeValidator {
- return &TMeValidator{
- http: &http.Client{
- Timeout: 10 * time.Second,
- CheckRedirect: func(req *http.Request, via []*http.Request) error {
- return nil // 跟随重定向
- },
- },
- }
- }
- // IsAlive 检查 TG 用户名是否活跃
- // 通过抓取 t.me/{username} 网页,检查是否包含头像元素来判断
- func (v *TMeValidator) IsAlive(ctx context.Context, username string) bool {
- url := fmt.Sprintf("https://t.me/%s", username)
- req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
- if err != nil {
- return false
- }
- req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1)")
- resp, err := v.http.Do(req)
- if err != nil {
- return false
- }
- defer resp.Body.Close()
- if resp.StatusCode == 404 {
- return false
- }
- // 读取部分响应体(前 10KB 足够判断)
- buf := make([]byte, 10240)
- n, _ := resp.Body.Read(buf)
- body := string(buf[:n])
- // 有效账号的页面包含头像相关的 HTML 元素
- if strings.Contains(body, "tgme_page_photo") || strings.Contains(body, "og:image") {
- return true
- }
- if strings.Contains(body, "tgme_page_description") {
- return true
- }
- return false
- }
|