kafka/internal/search/merge.go
Franz Kafka 7be03b4017 license: change from MIT to AGPLv3
Update LICENSE file and add AGPL header to all source files.

AGPLv3 ensures that if someone runs Kafka as a network service and
modifies it, they must release their source code under the same license.
2026-03-22 08:27:23 +00:00

137 lines
3.7 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package search
import (
"encoding/json"
"net/url"
"strings"
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
// MergeResponses merges multiple compatible JSON responses.
//
// MVP merge semantics:
// - results are concatenated with a simple de-dup key (engine|title|url)
// - suggestions/corrections are de-duplicated as sets
// - answers/infoboxes/unresponsive_engines are concatenated (best-effort)
func MergeResponses(responses []contracts.SearchResponse) contracts.SearchResponse {
var merged contracts.SearchResponse
mergedResultSeen := map[string]struct{}{}
mergedAnswerSeen := map[string]struct{}{}
mergedCorrectionsSeen := map[string]struct{}{}
mergedSuggestionsSeen := map[string]struct{}{}
for _, r := range responses {
if merged.Query == "" {
merged.Query = r.Query
}
merged.NumberOfResults = maxInt(merged.NumberOfResults, r.NumberOfResults)
for _, mr := range r.Results {
key := resultDedupKey(mr)
if _, ok := mergedResultSeen[key]; ok {
continue
}
mergedResultSeen[key] = struct{}{}
merged.Results = append(merged.Results, mr)
}
for _, ans := range r.Answers {
// De-dup by normalized JSON when possible.
b, err := json.Marshal(ans)
if err != nil {
merged.Answers = append(merged.Answers, ans)
continue
}
key := string(b)
if _, ok := mergedAnswerSeen[key]; ok {
continue
}
mergedAnswerSeen[key] = struct{}{}
merged.Answers = append(merged.Answers, ans)
}
merged.Corrections = unionStrings(merged.Corrections, r.Corrections, &mergedCorrectionsSeen)
merged.Suggestions = unionStrings(merged.Suggestions, r.Suggestions, &mergedSuggestionsSeen)
merged.Infoboxes = append(merged.Infoboxes, r.Infoboxes...)
merged.UnresponsiveEngines = append(merged.UnresponsiveEngines, r.UnresponsiveEngines...)
}
// Ensure non-nil slices to keep JSON shape stable.
if merged.Results == nil {
merged.Results = []contracts.MainResult{}
}
if merged.Answers == nil {
merged.Answers = []map[string]any{}
}
if merged.Corrections == nil {
merged.Corrections = []string{}
}
if merged.Infoboxes == nil {
merged.Infoboxes = []map[string]any{}
}
if merged.Suggestions == nil {
merged.Suggestions = []string{}
}
if merged.UnresponsiveEngines == nil {
merged.UnresponsiveEngines = [][2]string{}
}
return merged
}
func resultDedupKey(r contracts.MainResult) string {
urlStr := ""
if r.URL != nil {
urlStr = *r.URL
}
// Normalize host to reduce duplicates.
if u, err := url.Parse(urlStr); err == nil {
if u.Host != "" {
urlStr = u.Host + u.Path
}
}
return strings.ToLower(r.Engine) + "|" + strings.ToLower(r.Title) + "|" + urlStr
}
func unionStrings(dst []string, src []string, seen *map[string]struct{}) []string {
if *seen == nil {
*seen = map[string]struct{}{}
}
out := dst
for _, s := range src {
if _, ok := (*seen)[s]; ok {
continue
}
(*seen)[s] = struct{}{}
out = append(out, s)
}
return out
}
func maxInt(a, b int) int {
if a > b {
return a
}
return b
}