package engines import ( "io" "net/url" "strings" "github.com/ashie/gosearch/internal/contracts" ) // parseDuckDuckGoHTML parses DuckDuckGo Lite's HTML response for search results. // DDG Lite uses HTML tables with single-quoted class attributes and DDG tracking URLs. func parseDuckDuckGoHTML(r io.Reader) ([]contracts.MainResult, error) { body, err := io.ReadAll(r) if err != nil { return nil, err } content := string(body) results := make([]contracts.MainResult, 0) type parsedResult struct { href string title string } var parsedLinks []parsedResult remaining := content for { // DDG uses single quotes: class='result-link' idx := strings.Index(remaining, "class='result-link'") if idx == -1 { break } block := remaining[idx:] // Extract href from the anchor. href := extractAttr(block, "href") if href == "" { remaining = block[1:] continue } // DDG wraps real URLs in tracking redirect: //duckduckgo.com/l/?uddg=ENCODED_URL if strings.Contains(href, "duckduckgo.com/l/") || strings.Contains(href, "uddg=") { if uddgIdx := strings.Index(href, "uddg="); uddgIdx != -1 { encodedURL := href[uddgIdx+5:] // Split on & to get just the URL (other params may follow) if ampIdx := strings.Index(encodedURL, "&"); ampIdx != -1 { encodedURL = encodedURL[:ampIdx] } if decoded, err := url.QueryUnescape(encodedURL); err == nil { href = decoded } } } // Skip internal links. if strings.HasPrefix(href, "/") || strings.HasPrefix(href, "//duckduckgo.com") { remaining = block[1:] continue } // Extract title — text between > and after the class attribute. titleStart := strings.Index(block, ">") if titleStart == -1 { remaining = block[1:] continue } afterClass := block[titleStart+1:] titleEnd := strings.Index(afterClass, "") if titleEnd == -1 { remaining = block[1:] continue } title := stripHTML(afterClass[:titleEnd]) title = htmlUnescape(title) if title == "" { remaining = block[titleStart+1+titleEnd:] continue } parsedLinks = append(parsedLinks, parsedResult{ href: href, title: title, }) remaining = block[titleStart+1+titleEnd:] } // Extract snippets for each result. for i, link := range parsedLinks { snippet := "" linkIdx := strings.Index(content, link.href) if linkIdx == -1 { // Try partial match (the href might be HTML-encoded in the source). linkIdx = strings.Index(content, url.QueryEscape(link.href)) } if linkIdx != -1 { snippetRegion := content[linkIdx:] if len(snippetRegion) > 2000 { snippetRegion = snippetRegion[:2000] } // DDG uses single quotes: class='result-snippet' snippetIdx := strings.Index(snippetRegion, "class='result-snippet'") if snippetIdx != -1 { snippetBlock := snippetRegion[snippetIdx:] textStart := strings.Index(snippetBlock, ">") if textStart != -1 { textEnd := strings.Index(snippetBlock[textStart:], "") if textEnd != -1 { snippet = stripHTML(snippetBlock[textStart+1 : textStart+textEnd]) } } } } urlPtr := link.href results = append(results, contracts.MainResult{ Template: "default.html", Title: link.title, Content: snippet, URL: &urlPtr, Engine: "duckduckgo", Score: float64(len(parsedLinks) - i), Category: "general", Engines: []string{"duckduckgo"}, }) } return results, nil }