package crawler import ( "context" "strings" "time" "github.com/chromedp/chromedp" ) // DynamicCrawler chromedp 动态爬取 type DynamicCrawler struct{} // NewDynamicCrawler 创建 DynamicCrawler func NewDynamicCrawler() *DynamicCrawler { return &DynamicCrawler{} } // Crawl 用无头浏览器爬取(用于 JS 渲染的页面) func (c *DynamicCrawler) Crawl(ctx context.Context, targetURL string) *CrawlResult { result := &CrawlResult{} // 创建 chromedp allocator context,最多等待 30s allocCtx, cancel := chromedp.NewExecAllocator(ctx, chromedp.Headless, chromedp.DisableGPU, chromedp.NoSandbox, chromedp.Flag("disable-web-security", true), ) defer cancel() taskCtx, taskCancel := context.WithTimeout(allocCtx, 30*time.Second) defer taskCancel() chromeCtx, chromeCancel := chromedp.NewContext(taskCtx) defer chromeCancel() var html string var links []interface{} err := chromedp.Run(chromeCtx, chromedp.Navigate(targetURL), chromedp.Sleep(3*time.Second), // 等待 JS 渲染 chromedp.OuterHTML("html", &html), chromedp.Evaluate(`Array.from(document.querySelectorAll('a[href]')).map(a => a.href)`, &links), ) if err != nil { result.Error = err return result } result.HTML = html // 将 interface{} 切片转为字符串切片 for _, item := range links { link, ok := item.(string) if !ok { continue } result.Links = append(result.Links, link) if strings.Contains(link, "t.me/") || strings.Contains(link, "telegram.me/") { result.TgLinks = append(result.TgLinks, link) } } return result }