kafka/internal/engines/html_helpers.go
Franz Kafka 7be03b4017 license: change from MIT to AGPLv3
Update LICENSE file and add AGPL header to all source files.

AGPLv3 ensures that if someone runs Kafka as a network service and
modifies it, they must release their source code under the same license.
2026-03-22 08:27:23 +00:00

74 lines
1.9 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"strings"
)
// extractAttr finds attr="value" or attr='value' in an HTML string.
func extractAttr(s, attr string) string {
prefix := attr + `="`
idx := strings.Index(s, prefix)
if idx == -1 {
prefix = attr + "='"
idx = strings.Index(s, prefix)
if idx == -1 {
return ""
}
}
start := idx + len(prefix)
end := strings.Index(s[start:], "\"")
if end == -1 {
end = strings.Index(s[start:], "'")
}
if end == -1 {
end = len(s[start:])
}
return s[start : start+end]
}
// stripHTML removes all HTML tags from a string.
func stripHTML(s string) string {
var result strings.Builder
inTag := false
for _, r := range s {
if r == '<' {
inTag = true
continue
}
if r == '>' {
inTag = false
continue
}
if !inTag {
result.WriteRune(r)
}
}
return strings.TrimSpace(result.String())
}
// htmlUnescape handles basic HTML entities.
func htmlUnescape(s string) string {
s = strings.ReplaceAll(s, "&amp;", "&")
s = strings.ReplaceAll(s, "&lt;", "<")
s = strings.ReplaceAll(s, "&gt;", ">")
s = strings.ReplaceAll(s, "&quot;", "\"")
s = strings.ReplaceAll(s, "&#39;", "'")
s = strings.ReplaceAll(s, "&nbsp;", " ")
return s
}