diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 0000000..bada2e2 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,218 @@ +# Contributing — Adding a New Engine + +This guide walks through adding a new search engine to samsa. The minimal engine needs only an HTTP client, a query, and a result parser. + +--- + +## 1. Create the engine file + +Place it in `internal/engines/`: + +``` +internal/engines/ + myengine.go ← your engine + myengine_test.go ← tests (required) +``` + +Name the struct after the engine, e.g. `WolframEngine` for "wolfram". The `Name()` method returns the engine key used throughout samsa. + +## 2. Implement the Engine interface + +```go +package engines + +import ( + "context" + "github.com/metamorphosis-dev/samsa/internal/contracts" +) + +type MyEngine struct { + client *http.Client +} + +func (e *MyEngine) Name() string { return "myengine" } + +func (e *MyEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) { + // ... +} +``` + +### The SearchRequest fields you'll use most: + +| Field | Type | Description | +|-------|------|-------------| +| `Query` | `string` | The search query | +| `Pageno` | `int` | Current page number (1-based) | +| `Safesearch` | `int` | 0=off, 1=moderate, 2=strict | +| `Language` | `string` | ISO language code (e.g. `"en"`) | + +### The SearchResponse to return: + +```go +contracts.SearchResponse{ + Query: req.Query, + NumberOfResults: len(results), + Results: results, // []MainResult + Answers: []map[string]any{}, + Corrections: []string{}, + Infoboxes: []map[string]any{}, + Suggestions: []string{}, + UnresponsiveEngines: [][2]string{}, +} +``` + +### Empty query — return early: + +```go +if strings.TrimSpace(req.Query) == "" { + return contracts.SearchResponse{Query: req.Query}, nil +} +``` + +### Engine unavailable / error — graceful degradation: + +```go +// Rate limited or blocked +return contracts.SearchResponse{ + Query: req.Query, + UnresponsiveEngines: [][2]string{{"myengine", "reason"}}, + Results: []contracts.MainResult{}, + // ... empty other fields +}, nil + +// Hard error — return it +return contracts.SearchResponse{}, fmt.Errorf("myengine upstream error: status %d", resp.StatusCode) +``` + +## 3. Build the result + +```go +urlPtr := "https://example.com/result" +result := contracts.MainResult{ + Title: "Result Title", + Content: "Snippet or description text", + URL: &urlPtr, // pointer to string, required + Engine: "myengine", + Category: "general", // or "it", "science", "videos", "images", "social media" + Score: 0, // used for relevance ranking during merge + Engines: []string{"myengine"}, +} +``` + +### Template field + +The template system checks for `"videos"` and `"images"`. Everything else renders via `result_item.html`. Set `Template` only if you have a custom template; omit it for the default result card. + +### Category field + +Controls which category tab the result appears under and which engines are triggered: + +| Category | Engines used | +|----------|-------------| +| `general` | google, bing, ddg, brave, braveapi, qwant, wikipedia | +| `it` | github, stackoverflow | +| `science` | arxiv, crossref | +| `videos` | youtube | +| `images` | bing_images, ddg_images, qwant_images | +| `social media` | reddit | + +## 4. Wire it into the factory + +In `internal/engines/factory.go`, add your engine to the map returned by `NewDefaultPortedEngines`: + +```go +"myengine": &MyEngine{client: client}, +``` + +If your engine needs an API key, read it from config or the environment (see `braveapi` or `youtube` in factory.go for the pattern). + +## 5. Register defaults + +In `internal/engines/planner.go`: + +**Add to `defaultPortedEngines`:** +```go +var defaultPortedEngines = []string{ + // ... existing ... + "myengine", +} +``` + +**Add to category mapping in `inferFromCategories`** (if applicable): +```go +case "general": + set["myengine"] = true +``` + +**Update the sort order map** so results maintain consistent ordering: +```go +order := map[string]int{ + // ... existing ... + "myengine": N, // pick a slot +} +``` + +## 6. Add tests + +At minimum, test: +- `Name()` returns the correct string +- Nil engine returns an error +- Empty query returns zero results +- Successful API response parses correctly +- Rate limit / error cases return `UnresponsiveEngines` with a reason + +Use `httptest.NewServer` to mock the upstream API. See `arxiv_test.go` or `reddit_test.go` for examples. + +## 7. Build and test + +```bash +go build ./... +go test ./internal/engines/ -run MyEngine -v +go test ./... +``` + +## Example: Adding an RSS-based engine + +If the engine provides an RSS feed, the parsing is straightforward: + +```go +type rssItem struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description string `xml:"description"` +} + +type rssFeed struct { + Channel struct { + Items []rssItem `xml:"item"` + } `xml:"channel"` +} + +dec := xml.NewDecoder(resp.Body) +var feed rssFeed +dec.Decode(&feed) + +for _, item := range feed.Channel.Items { + urlPtr := item.Link + results = append(results, contracts.MainResult{ + Title: item.Title, + Content: stripHTML(item.Description), + URL: &urlPtr, + Engine: "myengine", + // ... + }) +} +``` + +## Checklist + +- [ ] Engine file created in `internal/engines/` +- [ ] `Engine` interface implemented (`Name()` + `Search()`) +- [ ] Empty query handled (return early, no error) +- [ ] Graceful degradation for errors and rate limits +- [ ] Results use `Category` to group with related engines +- [ ] Factory updated with new engine +- [ ] Planner updated (defaults + category mapping + sort order) +- [ ] Tests written covering main paths +- [ ] `go build ./...` succeeds +- [ ] `go test ./...` passes