serper.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. package search
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/json"
  6. "fmt"
  7. "net/http"
  8. "strings"
  9. "time"
  10. )
  11. const serperEndpoint = "https://google.serper.dev/search"
  12. // SerperClient Serper API 客户端
  13. type SerperClient struct {
  14. apiKey string
  15. perPage int
  16. maxPage int
  17. http *http.Client
  18. }
  19. // NewSerperClient 创建 Serper 客户端
  20. func NewSerperClient(apiKey string, perPage, maxPage int) *SerperClient {
  21. return &SerperClient{
  22. apiKey: apiKey,
  23. perPage: perPage,
  24. maxPage: maxPage,
  25. http: &http.Client{Timeout: 15 * time.Second},
  26. }
  27. }
  28. // SearchResult 单条搜索结果
  29. type SearchResult struct {
  30. Title string
  31. URL string
  32. Snippet string
  33. }
  34. // Search 搜索关键词,返回所有翻页结果
  35. func (c *SerperClient) Search(ctx context.Context, query string) ([]SearchResult, error) {
  36. var results []SearchResult
  37. for page := 1; page <= c.maxPage; page++ {
  38. pageResults, err := c.searchPage(ctx, query, page)
  39. if err != nil {
  40. break
  41. }
  42. results = append(results, pageResults...)
  43. if len(pageResults) < c.perPage {
  44. break
  45. }
  46. }
  47. return results, nil
  48. }
  49. // searchPage 搜索单页
  50. func (c *SerperClient) searchPage(ctx context.Context, query string, page int) ([]SearchResult, error) {
  51. body := map[string]interface{}{
  52. "q": query,
  53. "num": c.perPage,
  54. "page": page,
  55. "gl": "cn",
  56. "hl": "zh-cn",
  57. }
  58. data, _ := json.Marshal(body)
  59. req, err := http.NewRequestWithContext(ctx, "POST", serperEndpoint, bytes.NewReader(data))
  60. if err != nil {
  61. return nil, err
  62. }
  63. req.Header.Set("X-API-KEY", c.apiKey)
  64. req.Header.Set("Content-Type", "application/json")
  65. resp, err := c.http.Do(req)
  66. if err != nil {
  67. return nil, err
  68. }
  69. defer resp.Body.Close()
  70. if resp.StatusCode != 200 {
  71. return nil, fmt.Errorf("serper API error: %d", resp.StatusCode)
  72. }
  73. var result struct {
  74. Organic []struct {
  75. Title string `json:"title"`
  76. Link string `json:"link"`
  77. Snippet string `json:"snippet"`
  78. } `json:"organic"`
  79. }
  80. if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
  81. return nil, err
  82. }
  83. var out []SearchResult
  84. for _, r := range result.Organic {
  85. out = append(out, SearchResult{Title: r.Title, URL: r.Link, Snippet: r.Snippet})
  86. }
  87. return out, nil
  88. }
  89. // ClassifyURL 判断 URL 类型
  90. // 返回: "tg_channel", "nav_site", "discard"
  91. func ClassifyURL(rawURL string) string {
  92. // t.me 链接
  93. if strings.Contains(rawURL, "t.me/") || strings.Contains(rawURL, "telegram.me/") {
  94. return "tg_channel"
  95. }
  96. // 社交媒体/大站黑名单
  97. blacklistDomains := []string{
  98. "twitter.com", "facebook.com", "instagram.com", "youtube.com",
  99. "google.com", "baidu.com", "weibo.com", "zhihu.com",
  100. "github.com", "stackoverflow.com", "wikipedia.org",
  101. "amazon.com", "taobao.com", "jd.com", "tmall.com",
  102. }
  103. for _, d := range blacklistDomains {
  104. if strings.Contains(rawURL, d) {
  105. return "discard"
  106. }
  107. }
  108. // 黑名单扩展名
  109. blacklistExt := []string{".apk", ".zip", ".pdf", ".exe", ".dmg", ".ipa"}
  110. for _, ext := range blacklistExt {
  111. if strings.HasSuffix(strings.ToLower(rawURL), ext) {
  112. return "discard"
  113. }
  114. }
  115. // 正向信号:导航站
  116. navSignals := []string{"nav", "directory", "catalog", "list", "daohang", "dh"}
  117. u := strings.ToLower(rawURL)
  118. for _, sig := range navSignals {
  119. if strings.Contains(u, sig) {
  120. return "nav_site"
  121. }
  122. }
  123. return "discard"
  124. }