feat: add DuckDuckGo, GitHub, Reddit, and Bing engines
- DuckDuckGo: scrapes Lite HTML endpoint for results - Language-aware region mapping (de→de-de, ja→jp-jp, etc.) - HTML parser extracts result links and snippets from DDG Lite markup - Shared html_helpers.go with extractAttr, stripHTML, htmlUnescape - GitHub: uses public Search API (repos, sorted by stars) - No auth required (10 req/min unauthenticated) - Shows stars, language, topics, last updated date - Paginated via GitHub's page parameter - Reddit: uses public JSON search API - Respects safesearch (skips over_18 posts) - Shows subreddit, score, comment count - Links self-posts to the thread URL - Bing: scrapes web search HTML (b_algo containers) - Extracts titles, URLs, and snippets from Bing's result markup - Handles Bing's tracking URL encoding - Updated factory, config defaults, and config.example.toml - Full test suite: unit tests for all engines, HTML parsing tests, region mapping tests, live request tests (skipped in short mode) 9 engines total: wikipedia, arxiv, crossref, braveapi, qwant, duckduckgo, github, reddit, bing
This commit is contained in:
parent
28b61ff251
commit
df8fe9474b
14 changed files with 1030 additions and 5 deletions
120
internal/engines/github.go
Normal file
120
internal/engines/github.go
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
package engines
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ashie/gosearch/internal/contracts"
|
||||
)
|
||||
|
||||
// GitHubEngine searches GitHub repositories and code via the public search API.
|
||||
// No authentication required (rate-limited to 10 requests/min unauthenticated).
|
||||
type GitHubEngine struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func (e *GitHubEngine) Name() string { return "github" }
|
||||
|
||||
func (e *GitHubEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||
if strings.TrimSpace(req.Query) == "" {
|
||||
return contracts.SearchResponse{Query: req.Query}, nil
|
||||
}
|
||||
if e == nil || e.client == nil {
|
||||
return contracts.SearchResponse{}, errors.New("github engine not initialized")
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf(
|
||||
"https://api.github.com/search/repositories?q=%s&sort=stars&per_page=10&page=%d",
|
||||
url.QueryEscape(req.Query),
|
||||
req.Pageno,
|
||||
)
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
httpReq.Header.Set("User-Agent", "gosearch/0.1")
|
||||
httpReq.Header.Set("Accept", "application/vnd.github.v3+json")
|
||||
|
||||
resp, err := e.client.Do(httpReq)
|
||||
if err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||
return contracts.SearchResponse{}, fmt.Errorf("github api error: status=%d body=%q", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var data struct {
|
||||
TotalCount int `json:"total_count"`
|
||||
Items []struct {
|
||||
FullName string `json:"full_name"`
|
||||
Description string `json:"description"`
|
||||
HTMLURL string `json:"html_url"`
|
||||
Stars int `json:"stargazers_count"`
|
||||
Language string `json:"language"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Topics []string `json:"topics"`
|
||||
} `json:"items"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
|
||||
return contracts.SearchResponse{}, err
|
||||
}
|
||||
|
||||
results := make([]contracts.MainResult, 0, len(data.Items))
|
||||
for _, item := range data.Items {
|
||||
content := item.Description
|
||||
if item.Language != "" {
|
||||
if content != "" {
|
||||
content += " • "
|
||||
}
|
||||
content += fmt.Sprintf("Language: %s · ⭐ %d", item.Language, item.Stars)
|
||||
}
|
||||
|
||||
title := item.FullName
|
||||
if len(item.Topics) > 0 {
|
||||
title = item.FullName + " [" + strings.Join(item.Topics[:min(3, len(item.Topics))], ", ") + "]"
|
||||
}
|
||||
|
||||
updatedAt := item.UpdatedAt.Format("2006-01-02")
|
||||
if content != "" {
|
||||
content += " · Updated: " + updatedAt
|
||||
}
|
||||
|
||||
urlPtr := item.HTMLURL
|
||||
results = append(results, contracts.MainResult{
|
||||
Template: "default.html",
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: &urlPtr,
|
||||
Pubdate: strPtr(updatedAt),
|
||||
Engine: "github",
|
||||
Score: float64(item.Stars),
|
||||
Category: "it",
|
||||
Engines: []string{"github"},
|
||||
})
|
||||
}
|
||||
|
||||
return contracts.SearchResponse{
|
||||
Query: req.Query,
|
||||
NumberOfResults: data.TotalCount,
|
||||
Results: results,
|
||||
Answers: []map[string]any{},
|
||||
Corrections: []string{},
|
||||
Infoboxes: []map[string]any{},
|
||||
Suggestions: []string{},
|
||||
UnresponsiveEngines: [][2]string{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func strPtr(s string) *string { return &s }
|
||||
Loading…
Add table
Add a link
Reference in a new issue