// kafka — a privacy-respecting metasearch engine // Copyright (C) 2026-present metamorphosis-dev // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package engines import ( "strings" ) // extractAttr finds attr="value" or attr='value' in an HTML string. func extractAttr(s, attr string) string { prefix := attr + `="` idx := strings.Index(s, prefix) if idx == -1 { prefix = attr + "='" idx = strings.Index(s, prefix) if idx == -1 { return "" } } start := idx + len(prefix) end := strings.Index(s[start:], "\"") if end == -1 { end = strings.Index(s[start:], "'") } if end == -1 { end = len(s[start:]) } return s[start : start+end] } // stripHTML removes all HTML tags from a string. func stripHTML(s string) string { var result strings.Builder inTag := false for _, r := range s { if r == '<' { inTag = true continue } if r == '>' { inTag = false continue } if !inTag { result.WriteRune(r) } } return strings.TrimSpace(result.String()) } // htmlUnescape handles basic HTML entities. func htmlUnescape(s string) string { s = strings.ReplaceAll(s, "&", "&") s = strings.ReplaceAll(s, "<", "<") s = strings.ReplaceAll(s, ">", ">") s = strings.ReplaceAll(s, """, "\"") s = strings.ReplaceAll(s, "'", "'") s = strings.ReplaceAll(s, " ", " ") return s }