kafka/internal/engines/crossref.go
Franz Kafka 7be03b4017 license: change from MIT to AGPLv3
Update LICENSE file and add AGPL header to all source files.

AGPLv3 ensures that if someone runs Kafka as a network service and
modifies it, they must release their source code under the same license.
2026-03-22 08:27:23 +00:00

160 lines
4.1 KiB
Go

// kafka — a privacy-respecting metasearch engine
// Copyright (C) 2026-present metamorphosis-dev
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package engines
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/metamorphosis-dev/kafka/internal/contracts"
)
type CrossrefEngine struct {
client *http.Client
}
func (e *CrossrefEngine) Name() string { return "crossref" }
func (e *CrossrefEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("crossref engine not initialized")
}
q := strings.TrimSpace(req.Query)
if q == "" {
return contracts.SearchResponse{Query: req.Query}, nil
}
offset := 20 * (req.Pageno - 1)
args := url.Values{}
args.Set("query", q)
args.Set("offset", fmt.Sprintf("%d", offset))
endpoint := "https://api.crossref.org/works?" + args.Encode()
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return contracts.SearchResponse{}, err
}
resp, err := e.client.Do(httpReq)
if err != nil {
return contracts.SearchResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
Message struct {
Items []crossrefItem `json:"items"`
} `json:"message"`
}
if err := json.NewDecoder(resp.Body).Decode(&api); err != nil {
return contracts.SearchResponse{}, err
}
results := make([]contracts.MainResult, 0, len(api.Message.Items))
for _, item := range api.Message.Items {
title := ""
if len(item.Title) > 0 {
title = strings.TrimSpace(item.Title[0])
}
content := strings.TrimSpace(item.Abstract)
urlStr := strings.TrimSpace(item.URL)
if urlStr == "" {
urlStr = strings.TrimSpace(item.DOI)
}
pub := parseCrossrefDateParts(item.Published.DateParts)
urlPtr := urlStr
results = append(results, contracts.MainResult{
Template: "default.html",
Title: title,
Content: content,
URL: &urlPtr,
Pubdate: pub,
Engine: "crossref",
Score: 0,
Category: "science",
Priority: "",
Positions: nil,
Engines: []string{"crossref"},
})
}
return contracts.SearchResponse{
Query: req.Query,
NumberOfResults: len(results),
Results: results,
Answers: []map[string]any{},
Corrections: []string{},
Infoboxes: []map[string]any{},
Suggestions: []string{},
UnresponsiveEngines: [][2]string{},
}, nil
}
type crossrefItem struct {
Type string `json:"type"`
Title []string `json:"title"`
URL string `json:"URL"`
DOI string `json:"DOI"`
Abstract string `json:"abstract"`
Page string `json:"page"`
Publisher string `json:"publisher"`
Subject []string `json:"subject"`
Published crossrefPublished `json:"published"`
}
type crossrefPublished struct {
DateParts [][]int `json:"date-parts"`
}
func parseCrossrefDateParts(parts [][]int) *string {
if len(parts) == 0 || len(parts[0]) == 0 {
return nil
}
dp := parts[0]
year := dp[0]
month := 1
day := 1
if len(dp) >= 2 {
month = dp[1]
}
if len(dp) >= 3 {
day = dp[2]
}
t := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.UTC)
formatted := t.Format("2006-01-02 00:00:00+0000")
return &formatted
}