package search import ( "bytes" "context" "encoding/json" "fmt" "net/http" "strings" "time" ) const serperEndpoint = "https://google.serper.dev/search" // SerperClient Serper API 客户端 type SerperClient struct { apiKey string perPage int maxPage int http *http.Client } // NewSerperClient 创建 Serper 客户端 func NewSerperClient(apiKey string, perPage, maxPage int) *SerperClient { return &SerperClient{ apiKey: apiKey, perPage: perPage, maxPage: maxPage, http: &http.Client{Timeout: 15 * time.Second}, } } // SearchResult 单条搜索结果 type SearchResult struct { Title string URL string Snippet string } // Search 搜索关键词,返回所有翻页结果 func (c *SerperClient) Search(ctx context.Context, query string) ([]SearchResult, error) { var results []SearchResult for page := 1; page <= c.maxPage; page++ { pageResults, err := c.searchPage(ctx, query, page) if err != nil { break } results = append(results, pageResults...) if len(pageResults) < c.perPage { break } } return results, nil } // searchPage 搜索单页 func (c *SerperClient) searchPage(ctx context.Context, query string, page int) ([]SearchResult, error) { body := map[string]interface{}{ "q": query, "num": c.perPage, "page": page, "gl": "cn", "hl": "zh-cn", } data, _ := json.Marshal(body) req, err := http.NewRequestWithContext(ctx, "POST", serperEndpoint, bytes.NewReader(data)) if err != nil { return nil, err } req.Header.Set("X-API-KEY", c.apiKey) req.Header.Set("Content-Type", "application/json") resp, err := c.http.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != 200 { return nil, fmt.Errorf("serper API error: %d", resp.StatusCode) } var result struct { Organic []struct { Title string `json:"title"` Link string `json:"link"` Snippet string `json:"snippet"` } `json:"organic"` } if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { return nil, err } var out []SearchResult for _, r := range result.Organic { out = append(out, SearchResult{Title: r.Title, URL: r.Link, Snippet: r.Snippet}) } return out, nil } // ClassifyURL 判断 URL 类型 // 返回: "tg_channel", "nav_site", "discard" func ClassifyURL(rawURL string) string { // t.me 链接 if strings.Contains(rawURL, "t.me/") || strings.Contains(rawURL, "telegram.me/") { return "tg_channel" } // 社交媒体/大站黑名单 blacklistDomains := []string{ "twitter.com", "facebook.com", "instagram.com", "youtube.com", "google.com", "baidu.com", "weibo.com", "zhihu.com", "github.com", "stackoverflow.com", "wikipedia.org", "amazon.com", "taobao.com", "jd.com", "tmall.com", } for _, d := range blacklistDomains { if strings.Contains(rawURL, d) { return "discard" } } // 黑名单扩展名 blacklistExt := []string{".apk", ".zip", ".pdf", ".exe", ".dmg", ".ipa"} for _, ext := range blacklistExt { if strings.HasSuffix(strings.ToLower(rawURL), ext) { return "discard" } } // 正向信号:导航站 navSignals := []string{"nav", "directory", "catalog", "list", "daohang", "dh"} u := strings.ToLower(rawURL) for _, sig := range navSignals { if strings.Contains(u, sig) { return "nav_site" } } return "discard" }