Uses the official YouTube Data API v3. Requires YOUTUBE_API_KEY environment variable (free from Google Cloud Console). Returns video results with title, description, channel, publish date, and thumbnail URL. Falls back gracefully if no API key.
157 lines
3.8 KiB
Go
157 lines
3.8 KiB
Go
package engines
|
|
|
|
import (
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/metamorphosis-dev/kafka/internal/contracts"
|
|
)
|
|
|
|
var defaultPortedEngines = []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube"}
|
|
|
|
type Planner struct {
|
|
PortedSet map[string]bool
|
|
PortedList []string
|
|
}
|
|
|
|
func NewPlannerFromEnv() *Planner {
|
|
raw := strings.TrimSpace(os.Getenv("LOCAL_PORTED_ENGINES"))
|
|
if raw == "" {
|
|
return NewPlanner(defaultPortedEngines)
|
|
}
|
|
parts := splitCSV(raw)
|
|
if len(parts) == 0 {
|
|
return NewPlanner(defaultPortedEngines)
|
|
}
|
|
return NewPlanner(parts)
|
|
}
|
|
|
|
func NewPlanner(portedEngines []string) *Planner {
|
|
set := make(map[string]bool, len(portedEngines))
|
|
out := make([]string, 0, len(portedEngines))
|
|
for _, e := range portedEngines {
|
|
e = strings.TrimSpace(strings.ToLower(e))
|
|
if e == "" {
|
|
continue
|
|
}
|
|
if set[e] {
|
|
continue
|
|
}
|
|
set[e] = true
|
|
out = append(out, e)
|
|
}
|
|
return &Planner{
|
|
PortedSet: set,
|
|
PortedList: out,
|
|
}
|
|
}
|
|
|
|
// Plan returns:
|
|
// - localEngines: engines that are configured as ported for this service
|
|
// - upstreamEngines: engines that should be executed by the upstream instance
|
|
// - requestedEngines: the (possibly inferred) requested engines list
|
|
//
|
|
// If the request provides an explicit `engines` parameter, we use it.
|
|
// Otherwise we infer a small subset from `categories` for the starter set.
|
|
func (p *Planner) Plan(req contracts.SearchRequest) (localEngines, upstreamEngines, requestedEngines []string) {
|
|
if p == nil {
|
|
p = NewPlannerFromEnv()
|
|
}
|
|
|
|
requestedEngines = nil
|
|
if len(req.Engines) > 0 {
|
|
requestedEngines = normalizeList(req.Engines)
|
|
} else {
|
|
requestedEngines = inferFromCategories(req.Categories)
|
|
}
|
|
|
|
localEngines = make([]string, 0, len(requestedEngines))
|
|
upstreamEngines = make([]string, 0, len(requestedEngines))
|
|
for _, e := range requestedEngines {
|
|
if p.PortedSet[e] {
|
|
localEngines = append(localEngines, e)
|
|
} else {
|
|
upstreamEngines = append(upstreamEngines, e)
|
|
}
|
|
}
|
|
|
|
return localEngines, upstreamEngines, requestedEngines
|
|
}
|
|
|
|
func inferFromCategories(categories []string) []string {
|
|
// Minimal mapping for the initial porting subset.
|
|
// This mirrors the idea of selecting from engine categories without
|
|
// embedding the whole engine registry.
|
|
set := map[string]bool{}
|
|
for _, c := range categories {
|
|
switch strings.TrimSpace(strings.ToLower(c)) {
|
|
case "general":
|
|
set["wikipedia"] = true
|
|
set["braveapi"] = true
|
|
set["qwant"] = true
|
|
set["duckduckgo"] = true
|
|
set["bing"] = true
|
|
set["google"] = true
|
|
case "science", "scientific publications":
|
|
set["arxiv"] = true
|
|
set["crossref"] = true
|
|
case "it":
|
|
set["github"] = true
|
|
case "social media":
|
|
set["reddit"] = true
|
|
case "videos":
|
|
set["youtube"] = true
|
|
}
|
|
}
|
|
|
|
out := make([]string, 0, len(set))
|
|
for e := range set {
|
|
out = append(out, e)
|
|
}
|
|
// stable order
|
|
order := map[string]int{"wikipedia": 0, "braveapi": 1, "qwant": 2, "duckduckgo": 3, "bing": 4, "google": 5, "arxiv": 6, "crossref": 7, "github": 8, "reddit": 9, "youtube": 10}
|
|
sortByOrder(out, order)
|
|
return out
|
|
}
|
|
|
|
func sortByOrder(list []string, order map[string]int) {
|
|
// simple insertion sort (list is tiny)
|
|
for i := 1; i < len(list); i++ {
|
|
j := i
|
|
for j > 0 && order[list[j-1]] > order[list[j]] {
|
|
list[j-1], list[j] = list[j], list[j-1]
|
|
j--
|
|
}
|
|
}
|
|
}
|
|
|
|
func normalizeList(in []string) []string {
|
|
out := make([]string, 0, len(in))
|
|
seen := map[string]bool{}
|
|
for _, e := range in {
|
|
e = strings.TrimSpace(strings.ToLower(e))
|
|
if e == "" || seen[e] {
|
|
continue
|
|
}
|
|
seen[e] = true
|
|
out = append(out, e)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func splitCSV(s string) []string {
|
|
if s == "" {
|
|
return nil
|
|
}
|
|
parts := strings.Split(s, ",")
|
|
out := make([]string, 0, len(parts))
|
|
for _, p := range parts {
|
|
p = strings.TrimSpace(p)
|
|
if p == "" {
|
|
continue
|
|
}
|
|
out = append(out, p)
|
|
}
|
|
return out
|
|
}
|
|
|