package engines import ( "io" "strings" "github.com/ashie/gosearch/internal/contracts" ) // parseDuckDuckGoHTML parses DuckDuckGo Lite's HTML response for search results. func parseDuckDuckGoHTML(r io.Reader) ([]contracts.MainResult, error) { body, err := io.ReadAll(r) if err != nil { return nil, err } content := string(body) results := make([]contracts.MainResult, 0) type parsedResult struct { href string title string } var parsedLinks []parsedResult remaining := content for { idx := strings.Index(remaining, `class="result-link"`) if idx == -1 { break } block := remaining[idx:] href := extractAttr(block, "href") if href == "" { remaining = block[1:] continue } // Skip DDG internal links. if strings.HasPrefix(href, "/") || strings.Contains(href, "duckduckgo.com/l/") { remaining = block[1:] continue } // Extract title — text between > and after the href. titleStart := strings.Index(block, ">") if titleStart == -1 { remaining = block[1:] continue } afterHref := block[titleStart+1:] titleEnd := strings.Index(afterHref, "") if titleEnd == -1 { remaining = block[1:] continue } title := stripHTML(afterHref[:titleEnd]) title = htmlUnescape(title) parsedLinks = append(parsedLinks, parsedResult{ href: href, title: title, }) remaining = block[titleStart+1+titleEnd:] } // Extract snippets between results. for i, link := range parsedLinks { snippet := "" linkIdx := strings.Index(content, link.href) if linkIdx != -1 { snippetRegion := content[linkIdx+len(link.href):] if len(snippetRegion) > 2000 { snippetRegion = snippetRegion[:2000] } snippetIdx := strings.Index(snippetRegion, "result-snippet") if snippetIdx == -1 { snippetIdx = strings.Index(snippetRegion, "result__snippet") } if snippetIdx != -1 { snippetBlock := snippetRegion[snippetIdx:] textStart := strings.Index(snippetBlock, ">") if textStart != -1 { textEnd := strings.Index(snippetBlock[textStart:], "