| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- package search
- import (
- "bytes"
- "context"
- "encoding/json"
- "fmt"
- "net/http"
- "strings"
- "time"
- )
- const serperEndpoint = "https://google.serper.dev/search"
- // SerperClient Serper API 客户端
- type SerperClient struct {
- apiKey string
- perPage int
- maxPage int
- http *http.Client
- }
- // NewSerperClient 创建 Serper 客户端
- func NewSerperClient(apiKey string, perPage, maxPage int) *SerperClient {
- return &SerperClient{
- apiKey: apiKey,
- perPage: perPage,
- maxPage: maxPage,
- http: &http.Client{Timeout: 15 * time.Second},
- }
- }
- // SearchResult 单条搜索结果
- type SearchResult struct {
- Title string
- URL string
- Snippet string
- }
- // Search 搜索关键词,返回所有翻页结果
- func (c *SerperClient) Search(ctx context.Context, query string) ([]SearchResult, error) {
- var results []SearchResult
- for page := 1; page <= c.maxPage; page++ {
- pageResults, err := c.searchPage(ctx, query, page)
- if err != nil {
- break
- }
- results = append(results, pageResults...)
- if len(pageResults) < c.perPage {
- break
- }
- }
- return results, nil
- }
- // searchPage 搜索单页
- func (c *SerperClient) searchPage(ctx context.Context, query string, page int) ([]SearchResult, error) {
- body := map[string]interface{}{
- "q": query,
- "num": c.perPage,
- "page": page,
- "gl": "cn",
- "hl": "zh-cn",
- }
- data, _ := json.Marshal(body)
- req, err := http.NewRequestWithContext(ctx, "POST", serperEndpoint, bytes.NewReader(data))
- if err != nil {
- return nil, err
- }
- req.Header.Set("X-API-KEY", c.apiKey)
- req.Header.Set("Content-Type", "application/json")
- resp, err := c.http.Do(req)
- if err != nil {
- return nil, err
- }
- defer resp.Body.Close()
- if resp.StatusCode != 200 {
- return nil, fmt.Errorf("serper API error: %d", resp.StatusCode)
- }
- var result struct {
- Organic []struct {
- Title string `json:"title"`
- Link string `json:"link"`
- Snippet string `json:"snippet"`
- } `json:"organic"`
- }
- if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
- return nil, err
- }
- var out []SearchResult
- for _, r := range result.Organic {
- out = append(out, SearchResult{Title: r.Title, URL: r.Link, Snippet: r.Snippet})
- }
- return out, nil
- }
- // ClassifyURL 判断 URL 类型
- // 返回: "tg_channel", "nav_site", "discard"
- func ClassifyURL(rawURL string) string {
- // t.me 链接
- if strings.Contains(rawURL, "t.me/") || strings.Contains(rawURL, "telegram.me/") {
- return "tg_channel"
- }
- // 社交媒体/大站黑名单
- blacklistDomains := []string{
- "twitter.com", "facebook.com", "instagram.com", "youtube.com",
- "google.com", "baidu.com", "weibo.com", "zhihu.com",
- "github.com", "stackoverflow.com", "wikipedia.org",
- "amazon.com", "taobao.com", "jd.com", "tmall.com",
- }
- for _, d := range blacklistDomains {
- if strings.Contains(rawURL, d) {
- return "discard"
- }
- }
- // 黑名单扩展名
- blacklistExt := []string{".apk", ".zip", ".pdf", ".exe", ".dmg", ".ipa"}
- for _, ext := range blacklistExt {
- if strings.HasSuffix(strings.ToLower(rawURL), ext) {
- return "discard"
- }
- }
- // 正向信号:导航站
- navSignals := []string{"nav", "directory", "catalog", "list", "daohang", "dh"}
- u := strings.ToLower(rawURL)
- for _, sig := range navSignals {
- if strings.Contains(u, sig) {
- return "nav_site"
- }
- }
- return "discard"
- }
|