Update LICENSE file and add AGPL header to all source files. AGPLv3 ensures that if someone runs Kafka as a network service and modifies it, they must release their source code under the same license.
160 lines
4.1 KiB
Go
160 lines
4.1 KiB
Go
// kafka — a privacy-respecting metasearch engine
|
|
// Copyright (C) 2026-present metamorphosis-dev
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package engines
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
|
)
|
|
|
|
type CrossrefEngine struct {
|
|
client *http.Client
|
|
}
|
|
|
|
func (e *CrossrefEngine) Name() string { return "crossref" }
|
|
|
|
func (e *CrossrefEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
|
if e == nil || e.client == nil {
|
|
return contracts.SearchResponse{}, errors.New("crossref engine not initialized")
|
|
}
|
|
q := strings.TrimSpace(req.Query)
|
|
if q == "" {
|
|
return contracts.SearchResponse{Query: req.Query}, nil
|
|
}
|
|
|
|
offset := 20 * (req.Pageno - 1)
|
|
args := url.Values{}
|
|
args.Set("query", q)
|
|
args.Set("offset", fmt.Sprintf("%d", offset))
|
|
|
|
endpoint := "https://api.crossref.org/works?" + args.Encode()
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
resp, err := e.client.Do(httpReq)
|
|
if err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
|
|
return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status=%d body=%q", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var api struct {
|
|
Message struct {
|
|
Items []crossrefItem `json:"items"`
|
|
} `json:"message"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
|
|
return contracts.SearchResponse{}, err
|
|
}
|
|
|
|
results := make([]contracts.MainResult, 0, len(api.Message.Items))
|
|
for _, item := range api.Message.Items {
|
|
title := ""
|
|
if len(item.Title) > 0 {
|
|
title = strings.TrimSpace(item.Title[0])
|
|
}
|
|
|
|
content := strings.TrimSpace(item.Abstract)
|
|
|
|
urlStr := strings.TrimSpace(item.URL)
|
|
if urlStr == "" {
|
|
urlStr = strings.TrimSpace(item.DOI)
|
|
}
|
|
|
|
pub := parseCrossrefDateParts(item.Published.DateParts)
|
|
|
|
urlPtr := urlStr
|
|
results = append(results, contracts.MainResult{
|
|
Template: "default.html",
|
|
Title: title,
|
|
Content: content,
|
|
URL: &urlPtr,
|
|
Pubdate: pub,
|
|
Engine: "crossref",
|
|
Score: 0,
|
|
Category: "science",
|
|
Priority: "",
|
|
Positions: nil,
|
|
Engines: []string{"crossref"},
|
|
})
|
|
}
|
|
|
|
return contracts.SearchResponse{
|
|
Query: req.Query,
|
|
NumberOfResults: len(results),
|
|
Results: results,
|
|
Answers: []map[string]any{},
|
|
Corrections: []string{},
|
|
Infoboxes: []map[string]any{},
|
|
Suggestions: []string{},
|
|
UnresponsiveEngines: [][2]string{},
|
|
}, nil
|
|
}
|
|
|
|
type crossrefItem struct {
|
|
Type string `json:"type"`
|
|
Title []string `json:"title"`
|
|
URL string `json:"URL"`
|
|
DOI string `json:"DOI"`
|
|
Abstract string `json:"abstract"`
|
|
Page string `json:"page"`
|
|
Publisher string `json:"publisher"`
|
|
Subject []string `json:"subject"`
|
|
Published crossrefPublished `json:"published"`
|
|
}
|
|
|
|
type crossrefPublished struct {
|
|
DateParts [][]int `json:"date-parts"`
|
|
}
|
|
|
|
func parseCrossrefDateParts(parts [][]int) *string {
|
|
if len(parts) == 0 || len(parts[0]) == 0 {
|
|
return nil
|
|
}
|
|
|
|
dp := parts[0]
|
|
year := dp[0]
|
|
month := 1
|
|
day := 1
|
|
if len(dp) >= 2 {
|
|
month = dp[1]
|
|
}
|
|
if len(dp) >= 3 {
|
|
day = dp[2]
|
|
}
|
|
|
|
t := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC)
|
|
formatted := t.Format("2006-01-02 00:00:00+0000")
|
|
return &formatted
|
|
}
|
|
|