Full project rename from kafka to samsa (after Gregor Samsa, who woke one morning from uneasy dreams to find himself transformed). - Module: github.com/metamorphosis-dev/kafka → samsa - Binary: cmd/kafka/ → cmd/samsa/ - CSS: kafka.css → samsa.css - UI: all 'kafka' product names, titles, localStorage keys → samsa - localStorage keys: kafka-theme → samsa-theme, kafka-engines → samsa-engines - OpenSearch: ShortName, LongName, description, URLs updated - AGPL headers: 'kafka' → 'samsa' - Docs, configs, examples updated - Cache key prefix: kafka: → samsa:
85 lines
2.2 KiB
Go
85 lines
2.2 KiB
Go
// samsa — a privacy-respecting metasearch engine
|
|
// Copyright (C) 2026-present metamorphosis-dev
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package engines
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
// extractAttr finds attr="value" or attr='value' in an HTML string.
|
|
func extractAttr(s, attr string) string {
|
|
prefix := attr + `="`
|
|
idx := strings.Index(s, prefix)
|
|
if idx == -1 {
|
|
prefix = attr + "='"
|
|
idx = strings.Index(s, prefix)
|
|
if idx == -1 {
|
|
return ""
|
|
}
|
|
}
|
|
start := idx + len(prefix)
|
|
end := strings.Index(s[start:], "\"")
|
|
if end == -1 {
|
|
end = strings.Index(s[start:], "'")
|
|
}
|
|
if end == -1 {
|
|
end = len(s[start:])
|
|
}
|
|
return s[start : start+end]
|
|
}
|
|
|
|
// stripHTML removes all HTML tags from a string.
|
|
func stripHTML(s string) string {
|
|
var result strings.Builder
|
|
inTag := false
|
|
for _, r := range s {
|
|
if r == '<' {
|
|
inTag = true
|
|
continue
|
|
}
|
|
if r == '>' {
|
|
inTag = false
|
|
continue
|
|
}
|
|
if !inTag {
|
|
result.WriteRune(r)
|
|
}
|
|
}
|
|
return strings.TrimSpace(result.String())
|
|
}
|
|
|
|
// htmlUnescape handles basic HTML entities.
|
|
func htmlUnescape(s string) string {
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
s = strings.ReplaceAll(s, """, "\"")
|
|
s = strings.ReplaceAll(s, "'", "'")
|
|
s = strings.ReplaceAll(s, " ", " ")
|
|
return s
|
|
}
|
|
|
|
// extractImgSrc finds the first <img src="..."> in an HTML string and returns
|
|
// the src attribute value.
|
|
func extractImgSrc(html string) string {
|
|
idx := strings.Index(html, "<img")
|
|
if idx == -1 {
|
|
return ""
|
|
}
|
|
remaining := html[idx:]
|
|
return extractAttr(remaining, "src")
|
|
}
|