package engines import ( "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "strings" "github.com/ashie/gosearch/internal/contracts" "github.com/PuerkitoBio/goquery" ) // QwantEngine implements a SearXNG-like `qwant` (web) adapter using // Qwant v3 endpoint: https://api.qwant.com/v3/search/web. // // Qwant's API is not fully documented; this mirrors SearXNG's parsing logic // for the `web` category from `.agent/searxng/searx/engines/qwant.py`. type QwantEngine struct { client *http.Client category string // "web" (JSON API) or "web-lite" (HTML fallback) resultsPerPage int } func (e *QwantEngine) Name() string { return "qwant" } func (e *QwantEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { if e == nil || e.client == nil { return contracts.SearchResponse{}, errors.New("qwant engine not initialized") } q := strings.TrimSpace(req.Query) if q == "" { return contracts.SearchResponse{Query: req.Query}, nil } // For API parity we use SearXNG web defaults: count=10, offset=(pageno-1)*count. // The engine's config field exists so we can expand to news/images/videos later. count := e.resultsPerPage if count <= 0 { count = 10 } offset := 0 if req.Pageno > 1 { offset = (req.Pageno - 1) * count } mode := strings.TrimSpace(strings.ToLower(e.category)) if mode == "" { mode = "web" } switch mode { case "web-lite": return e.searchWebLite(ctx, req) case "web": return e.searchWebAPI(ctx, req, count, offset) default: // Unknown mode: treat as unresponsive. return contracts.SearchResponse{ Query: req.Query, UnresponsiveEngines: [][2]string{ {e.Name(), "unknown_qwant_mode"}, }, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, }, nil } } func (e *QwantEngine) searchWebAPI(ctx context.Context, req contracts.SearchRequest, count, offset int) (contracts.SearchResponse, error) { qLocale := qwantLocale(req.Language) args := url.Values{} args.Set("q", req.Query) args.Set("count", fmt.Sprintf("%d", count)) args.Set("locale", qLocale) args.Set("safesearch", fmt.Sprintf("%d", req.Safesearch)) args.Set("llm", "false") args.Set("tgp", "3") args.Set("offset", fmt.Sprintf("%d", offset)) endpoint := "https://api.qwant.com/v3/search/web?" + args.Encode() httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return contracts.SearchResponse{}, err } httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)") resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() // Qwant often returns a 403 captcha/JS block for the JSON API. if resp.StatusCode == http.StatusForbidden { return contracts.SearchResponse{ Query: req.Query, UnresponsiveEngines: [][2]string{ {e.Name(), "captcha_or_js_block"}, }, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, }, nil } if resp.StatusCode < 200 || resp.StatusCode >= 300 { body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024)) return contracts.SearchResponse{}, fmt.Errorf("qwant upstream error: status=%d body=%q", resp.StatusCode, string(body)) } body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) if err != nil { return contracts.SearchResponse{}, err } var top map[string]any if err := json.Unmarshal(body, &top); err != nil { return contracts.SearchResponse{}, err } status, _ := top["status"].(string) if status != "success" { return contracts.SearchResponse{ Query: req.Query, UnresponsiveEngines: [][2]string{ {e.Name(), "api_error"}, }, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, }, nil } data, _ := top["data"].(map[string]any) result, _ := data["result"].(map[string]any) items, _ := result["items"].(map[string]any) mainline := items["mainline"] rows := toSlice(mainline) if len(rows) == 0 { return contracts.SearchResponse{ Query: req.Query, NumberOfResults: 0, Results: []contracts.MainResult{}, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } results := make([]contracts.MainResult, 0, len(rows)) for _, row := range rows { rowMap, ok := row.(map[string]any) if !ok { continue } rowType, _ := rowMap["type"].(string) if rowType == "" { rowType = "web" } if rowType != "web" { continue } if rowType == "ads" { continue } rowItems := toSlice(rowMap["items"]) for _, it := range rowItems { itemMap, ok := it.(map[string]any) if !ok { continue } title := toString(itemMap["title"]) resURL := toString(itemMap["url"]) desc := toString(itemMap["desc"]) if resURL == "" { continue } urlPtr := resURL results = append(results, contracts.MainResult{ Template: "default.html", Title: title, Content: desc, URL: &urlPtr, Engine: e.Name(), Score: 0, Category: "general", Engines: []string{e.Name()}, }) } } return contracts.SearchResponse{ Query: req.Query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } func (e *QwantEngine) searchWebLite(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { qLocale := qwantLocale(req.Language) langBase := strings.SplitN(qLocale, "_", 2)[0] args := url.Values{} args.Set("q", req.Query) args.Set("locale", strings.ToLower(qLocale)) args.Set("l", langBase) args.Set("s", fmt.Sprintf("%d", req.Safesearch)) args.Set("p", fmt.Sprintf("%d", req.Pageno)) endpoint := "https://lite.qwant.com/?" + args.Encode() httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return contracts.SearchResponse{}, err } httpReq.Header.Set("User-Agent", "gosearch-go/0.1 (+https://github.com/ashie/gosearch)") resp, err := e.client.Do(httpReq) if err != nil { return contracts.SearchResponse{}, err } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024)) return contracts.SearchResponse{}, fmt.Errorf("qwant lite upstream error: status=%d body=%q", resp.StatusCode, string(body)) } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return contracts.SearchResponse{}, err } results := make([]contracts.MainResult, 0) seen := map[string]bool{} // Pattern 1: legacy/known qwant-lite structure. doc.Find("section article").Each(func(_ int, item *goquery.Selection) { // ignore randomly interspersed advertising adds if item.Find("span.tooltip").Length() > 0 { return } // In SearXNG: "./span[contains(@class, 'url partner')]" urlText := strings.TrimSpace(item.Find("span.url.partner").First().Text()) if urlText == "" { // fallback: any span with class containing both 'url' and 'partner' urlText = strings.TrimSpace(item.Find("span[class*='url'][class*='partner']").First().Text()) } title := strings.TrimSpace(item.Find("h2 a").First().Text()) content := strings.TrimSpace(item.Find("p").First().Text()) if urlText == "" { return } if seen[urlText] { return } seen[urlText] = true u := urlText results = append(results, contracts.MainResult{ Template: "default.html", Title: title, Content: content, URL: &u, Engine: e.Name(), Score: 0, Category: "general", Engines: []string{e.Name()}, }) }) // Pattern 2: broader fallback for updated lite markup: // any article/list item/div block containing an external anchor. // We keep this conservative by requiring non-empty title + URL. doc.Find("article, li, div").Each(func(_ int, item *goquery.Selection) { if len(results) >= 20 { return } // Skip ad-like blocks in fallback pass too. if item.Find("span.tooltip").Length() > 0 { return } // Skip obvious nav/footer blocks. classAttr, _ := item.Attr("class") classLower := strings.ToLower(classAttr) if strings.Contains(classLower, "nav") || strings.Contains(classLower, "footer") { return } a := item.Find("a[href]").First() if a.Length() == 0 { return } href, ok := a.Attr("href") if !ok { return } href = strings.TrimSpace(href) if href == "" { return } // Ignore in-page and relative links. if strings.HasPrefix(href, "/") || strings.HasPrefix(href, "#") { return } if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { return } // Skip known sponsored partner links surfaced in lite pages. if isKnownSponsoredURL(href) { return } if isQwantInternalURL(href) { // Ignore qwant nav/house links. return } title := strings.TrimSpace(a.Text()) if title == "" { return } if isLikelyNavTitle(title) { return } if seen[href] { return } seen[href] = true // Best-effort snippet extraction from nearby paragraph/span text. content := strings.TrimSpace(item.Find("p").First().Text()) if content == "" { content = strings.TrimSpace(item.Find("span").First().Text()) } // If there is no snippet, still keep clearly external result links. // Qwant-lite frequently omits rich snippets for some entries. u := href results = append(results, contracts.MainResult{ Template: "default.html", Title: title, Content: content, URL: &u, Engine: e.Name(), Score: 0, Category: "general", Engines: []string{e.Name()}, }) }) return contracts.SearchResponse{ Query: req.Query, NumberOfResults: len(results), Results: results, Answers: []map[string]any{}, Corrections: []string{}, Infoboxes: []map[string]any{}, Suggestions: []string{}, UnresponsiveEngines: [][2]string{}, }, nil } func qwantLocale(lang string) string { lang = strings.TrimSpace(lang) if lang == "" || lang == "auto" { return "en_US" } lang = strings.ReplaceAll(lang, "-", "_") parts := strings.SplitN(lang, "_", 2) base := strings.ToLower(parts[0]) country := "US" if len(parts) == 2 && strings.TrimSpace(parts[1]) != "" { country = strings.ToUpper(strings.TrimSpace(parts[1])) } // Qwant expects locales like en_US. return base + "_" + country } func toSlice(v any) []any { switch t := v.(type) { case []any: return t default: // Handle case where mainline might be a single object. if m, ok := v.(map[string]any); ok { return []any{m} } return nil } } func toString(v any) string { switch t := v.(type) { case string: return t case json.Number: return t.String() default: return "" } } func isQwantInternalURL(raw string) bool { u, err := url.Parse(raw) if err != nil { return false } host := strings.ToLower(u.Hostname()) if host == "" { return false } return host == "qwant.com" || host == "www.qwant.com" || strings.HasSuffix(host, ".qwant.com") || host == "about.qwant.com" } func isLikelyNavTitle(title string) bool { t := strings.TrimSpace(strings.ToLower(title)) switch t { case "qwant search", "search", "privacy", "discover the service", "better web", "discover": return true } if strings.HasPrefix(t, "get 20gb of free storage") { return true } return false } func isKnownSponsoredURL(raw string) bool { u, err := url.Parse(raw) if err != nil { return false } host := strings.ToLower(u.Hostname()) switch host { case "shdw.me", "www.shdw.me": return true } if strings.Contains(strings.ToLower(raw), "qwant-tool") { return true } return false }