Merge branch 'feat/youtube-engine'
This commit is contained in:
commit
a9ea99c104
7 changed files with 238 additions and 15 deletions
|
|
@ -56,6 +56,7 @@ func main() {
|
||||||
UpstreamURL: cfg.Upstream.URL,
|
UpstreamURL: cfg.Upstream.URL,
|
||||||
HTTPTimeout: cfg.HTTPTimeout(),
|
HTTPTimeout: cfg.HTTPTimeout(),
|
||||||
Cache: searchCache,
|
Cache: searchCache,
|
||||||
|
EnginesConfig: cfg,
|
||||||
})
|
})
|
||||||
|
|
||||||
acSvc := autocomplete.NewService(cfg.Upstream.URL, cfg.HTTPTimeout())
|
acSvc := autocomplete.NewService(cfg.Upstream.URL, cfg.HTTPTimeout())
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ url = ""
|
||||||
[engines]
|
[engines]
|
||||||
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
|
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
|
||||||
# Engines not listed here will be proxied to the upstream instance.
|
# Engines not listed here will be proxied to the upstream instance.
|
||||||
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"]
|
local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"]
|
||||||
|
|
||||||
[engines.brave]
|
[engines.brave]
|
||||||
# Brave Search API key (env: BRAVE_API_KEY)
|
# Brave Search API key (env: BRAVE_API_KEY)
|
||||||
|
|
@ -35,6 +35,10 @@ access_token = ""
|
||||||
category = "web-lite"
|
category = "web-lite"
|
||||||
results_per_page = 10
|
results_per_page = 10
|
||||||
|
|
||||||
|
[engines.youtube]
|
||||||
|
# YouTube Data API v3 key (env: YOUTUBE_API_KEY)
|
||||||
|
api_key = ""
|
||||||
|
|
||||||
[cache]
|
[cache]
|
||||||
# Valkey/Redis cache for search results.
|
# Valkey/Redis cache for search results.
|
||||||
# Leave address empty to disable caching entirely.
|
# Leave address empty to disable caching entirely.
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ type EnginesConfig struct {
|
||||||
LocalPorted []string `toml:"local_ported"`
|
LocalPorted []string `toml:"local_ported"`
|
||||||
Brave BraveConfig `toml:"brave"`
|
Brave BraveConfig `toml:"brave"`
|
||||||
Qwant QwantConfig `toml:"qwant"`
|
Qwant QwantConfig `toml:"qwant"`
|
||||||
|
YouTube YouTubeConfig `toml:"youtube"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// CacheConfig holds Valkey/Redis cache settings.
|
// CacheConfig holds Valkey/Redis cache settings.
|
||||||
|
|
@ -85,6 +86,10 @@ type QwantConfig struct {
|
||||||
ResultsPerPage int `toml:"results_per_page"`
|
ResultsPerPage int `toml:"results_per_page"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type YouTubeConfig struct {
|
||||||
|
APIKey string `toml:"api_key"`
|
||||||
|
}
|
||||||
|
|
||||||
// Load reads configuration from the given TOML file path.
|
// Load reads configuration from the given TOML file path.
|
||||||
// If the file does not exist, it returns defaults (empty values where applicable).
|
// If the file does not exist, it returns defaults (empty values where applicable).
|
||||||
// Environment variables are used as fallbacks for any zero-value fields.
|
// Environment variables are used as fallbacks for any zero-value fields.
|
||||||
|
|
@ -109,7 +114,7 @@ func defaultConfig() *Config {
|
||||||
},
|
},
|
||||||
Upstream: UpstreamConfig{},
|
Upstream: UpstreamConfig{},
|
||||||
Engines: EnginesConfig{
|
Engines: EnginesConfig{
|
||||||
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"},
|
LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"},
|
||||||
Qwant: QwantConfig{
|
Qwant: QwantConfig{
|
||||||
Category: "web-lite",
|
Category: "web-lite",
|
||||||
ResultsPerPage: 10,
|
ResultsPerPage: 10,
|
||||||
|
|
@ -151,6 +156,9 @@ func applyEnvOverrides(cfg *Config) {
|
||||||
if v := os.Getenv("BRAVE_ACCESS_TOKEN"); v != "" {
|
if v := os.Getenv("BRAVE_ACCESS_TOKEN"); v != "" {
|
||||||
cfg.Engines.Brave.AccessToken = v
|
cfg.Engines.Brave.AccessToken = v
|
||||||
}
|
}
|
||||||
|
if v := os.Getenv("YOUTUBE_API_KEY"); v != "" {
|
||||||
|
cfg.Engines.YouTube.APIKey = v
|
||||||
|
}
|
||||||
if v := os.Getenv("VALKEY_ADDRESS"); v != "" {
|
if v := os.Getenv("VALKEY_ADDRESS"); v != "" {
|
||||||
cfg.Cache.Address = v
|
cfg.Cache.Address = v
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,23 +4,42 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/metamorphosis-dev/kafka/internal/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewDefaultPortedEngines returns the starter set of Go-native engines.
|
// NewDefaultPortedEngines returns the starter set of Go-native engines.
|
||||||
// The service can swap/extend this registry later as more engines are ported.
|
// The service can swap/extend this registry later as more engines are ported.
|
||||||
func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
|
// If cfg is nil, falls back to reading API keys from environment variables.
|
||||||
|
func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string]Engine {
|
||||||
if client == nil {
|
if client == nil {
|
||||||
client = &http.Client{Timeout: 10 * time.Second}
|
client = &http.Client{Timeout: 10 * time.Second}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var braveAPIKey, braveAccessToken, youtubeAPIKey string
|
||||||
|
if cfg != nil {
|
||||||
|
braveAPIKey = cfg.Engines.Brave.APIKey
|
||||||
|
braveAccessToken = cfg.Engines.Brave.AccessToken
|
||||||
|
youtubeAPIKey = cfg.Engines.YouTube.APIKey
|
||||||
|
}
|
||||||
|
if braveAPIKey == "" {
|
||||||
|
braveAPIKey = os.Getenv("BRAVE_API_KEY")
|
||||||
|
}
|
||||||
|
if braveAccessToken == "" {
|
||||||
|
braveAccessToken = os.Getenv("BRAVE_ACCESS_TOKEN")
|
||||||
|
}
|
||||||
|
if youtubeAPIKey == "" {
|
||||||
|
youtubeAPIKey = os.Getenv("YOUTUBE_API_KEY")
|
||||||
|
}
|
||||||
|
|
||||||
return map[string]Engine{
|
return map[string]Engine{
|
||||||
"wikipedia": &WikipediaEngine{client: client},
|
"wikipedia": &WikipediaEngine{client: client},
|
||||||
"arxiv": &ArxivEngine{client: client},
|
"arxiv": &ArxivEngine{client: client},
|
||||||
"crossref": &CrossrefEngine{client: client},
|
"crossref": &CrossrefEngine{client: client},
|
||||||
"braveapi": &BraveEngine{
|
"braveapi": &BraveEngine{
|
||||||
client: client,
|
client: client,
|
||||||
apiKey: os.Getenv("BRAVE_API_KEY"),
|
apiKey: braveAPIKey,
|
||||||
accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"),
|
accessGateToken: braveAccessToken,
|
||||||
resultsPerPage: 20,
|
resultsPerPage: 20,
|
||||||
},
|
},
|
||||||
"qwant": &QwantEngine{
|
"qwant": &QwantEngine{
|
||||||
|
|
@ -33,5 +52,10 @@ func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
|
||||||
"reddit": &RedditEngine{client: client},
|
"reddit": &RedditEngine{client: client},
|
||||||
"bing": &BingEngine{client: client},
|
"bing": &BingEngine{client: client},
|
||||||
"google": &GoogleEngine{client: client},
|
"google": &GoogleEngine{client: client},
|
||||||
|
"youtube": &YouTubeEngine{
|
||||||
|
client: client,
|
||||||
|
apiKey: youtubeAPIKey,
|
||||||
|
baseURL: "https://www.googleapis.com",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import (
|
||||||
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||||
)
|
)
|
||||||
|
|
||||||
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"}
|
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
|
||||||
|
|
||||||
type Planner struct {
|
type Planner struct {
|
||||||
PortedSet map[string]bool
|
PortedSet map[string]bool
|
||||||
|
|
@ -99,6 +99,8 @@ func inferFromCategories(categories []string) []string {
|
||||||
set["github"] = true
|
set["github"] = true
|
||||||
case "social media":
|
case "social media":
|
||||||
set["reddit"] = true
|
set["reddit"] = true
|
||||||
|
case "videos":
|
||||||
|
set["youtube"] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -107,7 +109,7 @@ func inferFromCategories(categories []string) []string {
|
||||||
out = append(out, e)
|
out = append(out, e)
|
||||||
}
|
}
|
||||||
// stable order
|
// stable order
|
||||||
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9}
|
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10}
|
||||||
sortByOrder(out, order)
|
sortByOrder(out, order)
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
|
||||||
182
internal/engines/youtube.go
Normal file
182
internal/engines/youtube.go
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
package engines
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||||
|
)
|
||||||
|
|
||||||
|
type YouTubeEngine struct {
|
||||||
|
client *http.Client
|
||||||
|
apiKey string
|
||||||
|
baseURL string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *YouTubeEngine) Name() string { return "youtube" }
|
||||||
|
|
||||||
|
func (e *YouTubeEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
|
||||||
|
if strings.TrimSpace(req.Query) == "" {
|
||||||
|
return contracts.SearchResponse{Query: req.Query}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if e.apiKey == "" {
|
||||||
|
e.apiKey = os.Getenv("YOUTUBE_API_KEY")
|
||||||
|
}
|
||||||
|
|
||||||
|
maxResults := 10
|
||||||
|
if req.Pageno > 1 {
|
||||||
|
maxResults = 20
|
||||||
|
}
|
||||||
|
|
||||||
|
u := e.baseURL + "/youtube/v3/search?" + url.Values{
|
||||||
|
"part": {"snippet"},
|
||||||
|
"q": {req.Query},
|
||||||
|
"type": {"video"},
|
||||||
|
"maxResults": {fmt.Sprintf("%d", maxResults)},
|
||||||
|
"key": {e.apiKey},
|
||||||
|
}.Encode()
|
||||||
|
|
||||||
|
if req.Language != "" && req.Language != "auto" {
|
||||||
|
lang := strings.Split(strings.ToLower(req.Language), "-")[0]
|
||||||
|
u += "&relevanceLanguage=" + lang
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := e.client.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("youtube api error: status=%d body=%q", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
var apiResp youtubeSearchResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||||
|
return contracts.SearchResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if apiResp.Error != nil {
|
||||||
|
return contracts.SearchResponse{}, fmt.Errorf("youtube api error: %s", apiResp.Error.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
results := make([]contracts.MainResult, 0, len(apiResp.Items))
|
||||||
|
for _, item := range apiResp.Items {
|
||||||
|
if item.ID.VideoID == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
videoURL := "https://www.youtube.com/watch?v=" + item.ID.VideoID
|
||||||
|
urlPtr := videoURL
|
||||||
|
|
||||||
|
published := ""
|
||||||
|
if item.Snippet.PublishedAt != "" {
|
||||||
|
if t, err := time.Parse(time.RFC3339, item.Snippet.PublishedAt); err == nil {
|
||||||
|
published = t.Format("Jan 2, 2006")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
content := item.Snippet.Description
|
||||||
|
if len(content) > 300 {
|
||||||
|
content = content[:300] + "..."
|
||||||
|
}
|
||||||
|
if published != "" {
|
||||||
|
content = "Published " + published + " · " + content
|
||||||
|
}
|
||||||
|
|
||||||
|
thumbnail := ""
|
||||||
|
if item.Snippet.Thumbnails.High.URL != "" {
|
||||||
|
thumbnail = item.Snippet.Thumbnails.High.URL
|
||||||
|
} else if item.Snippet.Thumbnails.Medium.URL != "" {
|
||||||
|
thumbnail = item.Snippet.Thumbnails.Medium.URL
|
||||||
|
}
|
||||||
|
|
||||||
|
results = append(results, contracts.MainResult{
|
||||||
|
Template: "videos.html",
|
||||||
|
Title: item.Snippet.Title,
|
||||||
|
URL: &urlPtr,
|
||||||
|
Content: content,
|
||||||
|
Thumbnail: thumbnail,
|
||||||
|
Engine: "youtube",
|
||||||
|
Score: 1.0,
|
||||||
|
Category: "videos",
|
||||||
|
Engines: []string{"youtube"},
|
||||||
|
Metadata: map[string]any{
|
||||||
|
"channel": item.Snippet.ChannelTitle,
|
||||||
|
"video_id": item.Snippet.ResourceID.VideoID,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return contracts.SearchResponse{
|
||||||
|
Query: req.Query,
|
||||||
|
NumberOfResults: len(results),
|
||||||
|
Results: results,
|
||||||
|
Answers: []map[string]any{},
|
||||||
|
Corrections: []string{},
|
||||||
|
Infoboxes: []map[string]any{},
|
||||||
|
Suggestions: []string{},
|
||||||
|
UnresponsiveEngines: [][2]string{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// YouTube API response types.
|
||||||
|
|
||||||
|
type youtubeSearchResponse struct {
|
||||||
|
Items []youtubeSearchItem `json:"items"`
|
||||||
|
PageInfo struct {
|
||||||
|
TotalResults int `json:"totalResults"`
|
||||||
|
ResultsPerPage int `json:"resultsPerPage"`
|
||||||
|
} `json:"pageInfo"`
|
||||||
|
NextPageToken string `json:"nextPageToken"`
|
||||||
|
Error *struct {
|
||||||
|
Code int `json:"code"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
Errors []struct {
|
||||||
|
Domain string `json:"domain"`
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
Message string `json:"message"`
|
||||||
|
} `json:"errors"`
|
||||||
|
} `json:"error"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type youtubeSearchItem struct {
|
||||||
|
ID struct {
|
||||||
|
VideoID string `json:"videoId"`
|
||||||
|
} `json:"id"`
|
||||||
|
Snippet struct {
|
||||||
|
PublishedAt string `json:"publishedAt"`
|
||||||
|
ChannelID string `json:"channelId"`
|
||||||
|
ChannelTitle string `json:"channelTitle"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Thumbnails struct {
|
||||||
|
Default struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
} `json:"default"`
|
||||||
|
Medium struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
} `json:"medium"`
|
||||||
|
High struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
} `json:"high"`
|
||||||
|
} `json:"thumbnails"`
|
||||||
|
ResourceID struct {
|
||||||
|
VideoID string `json:"videoId"`
|
||||||
|
} `json:"resourceId"`
|
||||||
|
} `json:"snippet"`
|
||||||
|
}
|
||||||
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/metamorphosis-dev/kafka/internal/cache"
|
"github.com/metamorphosis-dev/kafka/internal/cache"
|
||||||
|
"github.com/metamorphosis-dev/kafka/internal/config"
|
||||||
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
||||||
"github.com/metamorphosis-dev/kafka/internal/engines"
|
"github.com/metamorphosis-dev/kafka/internal/engines"
|
||||||
"github.com/metamorphosis-dev/kafka/internal/upstream"
|
"github.com/metamorphosis-dev/kafka/internal/upstream"
|
||||||
|
|
@ -16,6 +17,7 @@ type ServiceConfig struct {
|
||||||
UpstreamURL string
|
UpstreamURL string
|
||||||
HTTPTimeout time.Duration
|
HTTPTimeout time.Duration
|
||||||
Cache *cache.Cache
|
Cache *cache.Cache
|
||||||
|
EnginesConfig *config.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
type Service struct {
|
type Service struct {
|
||||||
|
|
@ -44,7 +46,7 @@ func NewService(cfg ServiceConfig) *Service {
|
||||||
return &Service{
|
return &Service{
|
||||||
upstreamClient: up,
|
upstreamClient: up,
|
||||||
planner: engines.NewPlannerFromEnv(),
|
planner: engines.NewPlannerFromEnv(),
|
||||||
localEngines: engines.NewDefaultPortedEngines(httpClient),
|
localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig),
|
||||||
cache: cfg.Cache,
|
cache: cfg.Cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue