package crawler

import (
	"context"
	"strings"
	"sync"
	"time"

	"github.com/gocolly/colly/v2"
)

// StaticCrawler 静态网页爬取（colly）
type StaticCrawler struct {
	mu       sync.RWMutex
	proxyURL string
}

// NewStaticCrawler 创建 StaticCrawler
func NewStaticCrawler() *StaticCrawler { return &StaticCrawler{} }

// SetProxy sets the proxy URL for subsequent crawl requests.
func (c *StaticCrawler) SetProxy(proxyURL string) {
	c.mu.Lock()
	c.proxyURL = proxyURL
	c.mu.Unlock()
}

// GetProxy returns the current proxy URL.
func (c *StaticCrawler) GetProxy() string {
	c.mu.RLock()
	defer c.mu.RUnlock()
	return c.proxyURL
}

// CrawlResult 爬取结果
type CrawlResult struct {
	Links   []string // 发现的链接
	TgLinks []string // t.me 链接
	Emails  []string
	HTML    string
	Error   error
}

// Crawl 爬取网页，提取所有链接
func (c *StaticCrawler) Crawl(ctx context.Context, targetURL string) *CrawlResult {
	result := &CrawlResult{}

	collector := colly.NewCollector(
		colly.MaxDepth(1),
		colly.Async(false),
	)
	collector.SetRequestTimeout(15 * time.Second)

	// Snapshot proxy under lock
	proxyURL := c.GetProxy()
	if proxyURL != "" {
		collector.SetProxy(proxyURL)
	}

	// 提取所有 <a href> 链接
	collector.OnHTML("a[href]", func(e *colly.HTMLElement) {
		href := e.Attr("href")
		if href == "" {
			return
		}

		// 绝对 URL
		absURL := e.Request.AbsoluteURL(href)
		if absURL == "" {
			return
		}

		result.Links = append(result.Links, absURL)

		if strings.Contains(absURL, "t.me/") || strings.Contains(absURL, "telegram.me/") {
			result.TgLinks = append(result.TgLinks, absURL)
		}
	})

	collector.OnResponse(func(r *colly.Response) {
		result.HTML = string(r.Body)
	})

	collector.OnError(func(r *colly.Response, err error) {
		result.Error = err
	})

	// 使用 channel 传递 context 取消
	done := make(chan struct{})
	go func() {
		collector.Visit(targetURL) //nolint:errcheck
		close(done)
	}()

	select {
	case <-ctx.Done():
		result.Error = ctx.Err()
	case <-done:
	}

	return result
}