diff --git a/.forgejo/workflows/test.yml b/.forgejo/workflows/test.yml
index 5f7efb4..6b09ea7 100644
--- a/.forgejo/workflows/test.yml
+++ b/.forgejo/workflows/test.yml
@@ -11,15 +11,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
- uses: https://github.com/actions/checkout@v5
+ uses: https://github.com/actions/checkout@v4
- name: Set up Go
uses: https://github.com/actions/setup-go@v5
with:
- go-version-file: go.mod
-
- - name: Clean vendor
- run: rm -rf vendor
+ go-version: '1.24'
- name: Test
run: go test -race -v ./...
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
deleted file mode 100644
index 47cc920..0000000
--- a/.github/workflows/test.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Tests
-
-on:
- pull_request:
- branches: [main]
- push:
- branches: [main]
-
-jobs:
- test:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout
- uses: actions/checkout@v5
-
- - name: Set up Go
- uses: actions/setup-go@v5
- with:
- go-version-file: go.mod
-
- - name: Clean vendor
- run: rm -rf vendor
-
- - name: Test
- run: go test -race -v ./...
diff --git a/.gitignore b/.gitignore
index 6cea500..a5388c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,5 @@
node_modules/
.agent/
-internal/spa/dist/
-frontend/node_modules/
-frontend/dist/
-frontend/bun.lock
-frontend/bun.lockb
-frontend/package-lock.json
*.exe
*.exe~
*.dll
diff --git a/CLAUDE.md b/CLAUDE.md
index e136dd7..1ba6bdc 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
## Project Overview
-samsa is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream metasearch instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response.
+kafka is a privacy-respecting metasearch engine written in Go. It provides a SearXNG-compatible `/search` API and an HTML frontend (HTMX + Go templates). 9 engines are implemented natively in Go; unlisted engines can be proxied to an upstream SearXNG instance. Responses from multiple engines are merged into a single JSON/CSV/RSS/HTML response.
## Build & Run Commands
@@ -22,7 +22,7 @@ go test -run TestWikipedia ./internal/engines/
go test -v ./internal/engines/
# Run the server (requires config.toml)
-go run ./cmd/samsa -config config.toml
+go run ./cmd/searxng-go -config config.toml
```
There is no Makefile. There is no linter configured.
@@ -37,13 +37,13 @@ There is no Makefile. There is no linter configured.
- `internal/config` — TOML-based configuration with env var fallbacks. `Load(path)` reads `config.toml`; env vars override zero-value fields. See `config.example.toml` for all settings.
- `internal/engines` — `Engine` interface and all 9 Go-native implementations. `factory.go` registers engines via `NewDefaultPortedEngines()`. `planner.go` routes engines to local or upstream based on `LOCAL_PORTED_ENGINES` env var.
- `internal/search` — `Service` orchestrates the pipeline: cache check, planning, parallel engine execution via goroutines/WaitGroup, upstream proxying, response merging. Individual engine failures are reported as `unresponsive_engines` rather than aborting the search. Qwant has fallback logic to upstream on empty results.
-- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise.
+- `internal/autocomplete` — Fetches search suggestions. Proxies to upstream SearXNG `/autocompleter` if configured, falls back to Wikipedia OpenSearch API otherwise.
- `internal/httpapi` — HTTP handlers for `/`, `/search`, `/autocompleter`, `/healthz`, `/opensearch.xml`. Detects HTMX requests via `HX-Request` header to return fragments instead of full pages.
-- `internal/upstream` — Client that proxies requests to an upstream metasearch instance via POST.
+- `internal/upstream` — Client that proxies requests to an upstream SearXNG instance via POST.
- `internal/cache` — Valkey/Redis-backed cache with SHA-256 cache keys. No-op if unconfigured.
- `internal/middleware` — Three rate limiters (per-IP sliding window, burst+sustained, global) and CORS. All disabled by default.
- `internal/views` — HTML templates and static files embedded via `//go:embed`. Renders full pages or HTMX fragments. Templates: `base.html`, `index.html`, `results.html`, `results_inner.html`, `result_item.html`.
-- `cmd/samsa` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server.
+- `cmd/searxng-go` — Entry point. Loads TOML config, seeds env vars for engine code, wires up middleware chain, starts HTTP server.
**Engine interface** (`internal/engines/engine.go`):
```go
@@ -66,7 +66,7 @@ Config is loaded from `config.toml` (see `config.example.toml`). All fields can
## Conventions
-- Module path: `github.com/metamorphosis-dev/samsa`
+- Module path: `github.com/metamorphosis-dev/kafka`
- Tests use shared mock helpers in `internal/engines/http_mock_test.go` (`roundTripperFunc`, `httpResponse`)
- Engine implementations are single files under `internal/engines/` (e.g., `wikipedia.go`, `duckduckgo.go`)
- Response merging de-duplicates by `engine|title|url` key; suggestions/corrections are merged as sets
diff --git a/Dockerfile b/Dockerfile
index 9f3443f..c41b5a1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@ RUN go mod download
# Copy source and build
COPY . .
-RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/kafka
+RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /kafka ./cmd/searxng-go
# Runtime stage
FROM alpine:3.21
@@ -21,7 +21,7 @@ RUN apk add --no-cache ca-certificates tzdata
COPY --from=builder /kafka /usr/local/bin/kafka
COPY config.example.toml /etc/kafka/config.example.toml
-EXPOSE 5355
+EXPOSE 8080
ENTRYPOINT ["kafka"]
CMD ["-config", "/etc/kafka/config.toml"]
diff --git a/LICENSE b/LICENSE
index cb069f5..4fdaf2a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,662 +1,21 @@
- GNU AFFERO GENERAL PUBLIC LICENSE
- Version 3, 19 November 2007
-
- Copyright (C) 2026-present metamorphosis-dev
-
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
- A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate. Many developers of free software are heartened and
-encouraged by the resulting cooperation. However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
- The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community. It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server. Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
- An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals. This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing under
-this license.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU Affero General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the Corresponding
- Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Remote Network Interaction; Use with the GNU General Public License.
-
- Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your version
-supports such interaction) an opportunity to receive the Corresponding
-Source of your version by providing access to the Corresponding Source
-from a network server at no charge, through some standard or customary
-means of facilitating copying of software. This Corresponding Source
-shall include the Corresponding Source for any work covered by version 3
-of the GNU General Public License that is incorporated pursuant to the
-following paragraph.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU Affero General Public License from time to time. Such new versions
-will be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- kafka — a privacy-respecting metasearch engine
- Copyright (C) 2026-present metamorphosis-dev
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-
- Also add information on how to contact you by electronic and paper mail.
-
- If your software can interact with users remotely through a computer
-network, you should also make sure that it provides a way for users to
-get its source. For example, if your program is a web application, its
-interface could display a "Source" link that leads users to an archive
-of the code. There are many ways you could offer source, and different
-solutions will be better for different programs; see section 13 for the
-specific requirements.
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU AGPL, see
-.
+MIT License
+
+Copyright (c) 2026-present metamorphosis-dev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 7427922..2f0868f 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,20 @@
-# samsa
-
-*samsa — named for Gregor Samsa, who woke to find himself transformed. You wanted results; you got a metasearch engine.*
+# kafka
A privacy-respecting, open metasearch engine written in Go. SearXNG-compatible API with an HTML frontend, designed to be fast, lightweight, and deployable anywhere.
-**11 engines. No JavaScript required. No tracking. One binary.**
+**9 engines. No JavaScript. No tracking. One binary.**
## Features
- **SearXNG-compatible API** — drop-in replacement for existing integrations
-- **11 search engines** — Wikipedia, arXiv, Crossref, Brave Search API, Brave (scraping), Qwant, DuckDuckGo, GitHub, Reddit, Bing, Google, YouTube
-- **Stack Overflow** — bonus engine, not enabled by default
-- **HTML frontend** — Go templates + HTMX with instant search, dark mode, responsive design
+- **9 search engines** — Wikipedia, arXiv, Crossref, Brave, Qwant, DuckDuckGo, GitHub, Reddit, Bing
+- **HTML frontend** — HTMX + Go templates with instant search, dark mode, responsive design
- **Valkey cache** — optional Redis-compatible caching with configurable TTL
- **Rate limiting** — three layers: per-IP, burst, and global (all disabled by default)
- **CORS** — configurable origins for browser-based clients
-- **OpenSearch** — browsers can add samsa as a search engine from the address bar
+- **OpenSearch** — browsers can add kafka as a search engine from the address bar
- **Graceful degradation** — individual engine failures don't kill the whole search
-- **Docker** — multi-stage build, static binary, ~20MB runtime image
+- **Docker** — multi-stage build, ~20MB runtime image
- **NixOS** — native NixOS module with systemd service
## Quick Start
@@ -25,17 +22,17 @@ A privacy-respecting, open metasearch engine written in Go. SearXNG-compatible A
### Binary
```bash
-git clone https://git.ashisgreat.xyz/penal-colony/samsa.git
-cd samsa
-go build ./cmd/samsa
-./samsa -config config.toml
+git clone https://git.ashisgreat.xyz/penal-colony/gosearch.git
+cd kafka
+go build ./cmd/searxng-go
+./searxng-go -config config.toml
```
### Docker Compose
```bash
cp config.example.toml config.toml
-# Edit config.toml — set your Brave API key, YouTube API key, etc.
+# Edit config.toml — set your Brave API key, etc.
docker compose up -d
```
@@ -44,28 +41,28 @@ docker compose up -d
Add to your flake inputs:
```nix
-inputs.samsa.url = "git+https://git.ashisgreat.xyz/penal-colony/samsa.git";
+inputs.kafka.url = "git+https://git.ashisgreat.xyz/penal-colony/gosearch.git";
```
Enable in your configuration:
```nix
-imports = [ inputs.samsa.nixosModules.default ];
+imports = [ inputs.kafka.nixosModules.default ];
-services.samsa = {
+services.kafka = {
enable = true;
openFirewall = true;
baseUrl = "https://search.example.com";
- # config = "/etc/samsa/config.toml"; # default
+ # config = "/etc/kafka/config.toml"; # default
};
```
Write your config:
```bash
-sudo mkdir -p /etc/samsa
-sudo cp config.example.toml /etc/samsa/config.toml
-sudo $EDITOR /etc/samsa/config.toml
+sudo mkdir -p /etc/kafka
+sudo cp config.example.toml /etc/kafka/config.toml
+sudo $EDITOR /etc/kafka/config.toml
```
Deploy:
@@ -79,7 +76,7 @@ sudo nixos-rebuild switch --flake .#
```bash
nix develop
go test ./...
-go run ./cmd/samsa -config config.toml
+go run ./cmd/searxng-go -config config.toml
```
## Endpoints
@@ -110,7 +107,7 @@ go run ./cmd/samsa -config config.toml
### Example
```bash
-curl "http://localhost:5355/search?q=golang&format=json&engines=github,duckduckgo"
+curl "http://localhost:8080/search?q=golang&format=json&engines=github,duckduckgo"
```
### Response (JSON)
@@ -141,10 +138,8 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o
### Key Sections
- **`[server]`** — port, timeout, public base URL for OpenSearch
-- **`[upstream]`** — optional upstream metasearch proxy for unported engines
+- **`[upstream]`** — optional upstream SearXNG proxy for unported engines
- **`[engines]`** — which engines run locally, engine-specific settings
-- **`[engines.brave]`** — Brave Search API key
-- **`[engines.youtube]`** — YouTube Data API v3 key
- **`[cache]`** — Valkey/Redis address, password, TTL
- **`[cors]`** — allowed origins and methods
- **`[rate_limit]`** — per-IP sliding window (30 req/min default)
@@ -155,14 +150,13 @@ Copy `config.example.toml` to `config.toml` and edit. All settings can also be o
| Variable | Description |
|---|---|
-| `PORT` | Listen port (default: 5355) |
+| `PORT` | Listen port (default: 8080) |
| `BASE_URL` | Public URL for OpenSearch XML |
-| `UPSTREAM_SEARXNG_URL` | Upstream instance URL |
+| `UPSTREAM_SEARXNG_URL` | Upstream SearXNG instance URL |
| `LOCAL_PORTED_ENGINES` | Comma-separated local engine list |
| `HTTP_TIMEOUT` | Upstream request timeout |
| `BRAVE_API_KEY` | Brave Search API key |
| `BRAVE_ACCESS_TOKEN` | Gate requests with token |
-| `YOUTUBE_API_KEY` | YouTube Data API v3 key |
| `VALKEY_ADDRESS` | Valkey/Redis address |
| `VALKEY_PASSWORD` | Valkey/Redis password |
| `VALKEY_CACHE_TTL` | Cache TTL |
@@ -176,64 +170,55 @@ See `config.example.toml` for the full list including rate limiting and CORS var
| Wikipedia | MediaWiki API | General knowledge |
| arXiv | arXiv API | Academic papers |
| Crossref | Crossref API | Academic metadata |
-| Brave Search API | Brave API | General web (requires API key) |
-| Brave | Brave Lite HTML | General web (no key needed) |
+| Brave | Brave Search API | General web (requires API key) |
| Qwant | Qwant Lite HTML | General web |
| DuckDuckGo | DDG Lite HTML | General web |
| GitHub | GitHub Search API v3 | Code and repositories |
| Reddit | Reddit JSON API | Discussions |
| Bing | Bing RSS | General web |
-| Google | GSA User-Agent scraping | General web (no API key) |
-| YouTube | YouTube Data API v3 | Videos (requires API key) |
-| Stack Overflow | Stack Exchange API | Q&A (registered, not enabled by default) |
-Engines not listed in `engines.local_ported` are proxied to an upstream metasearch instance if `upstream.url` is configured.
-
-### API Keys
-
-Brave Search API and YouTube Data API require keys. If omitted, those engines are silently skipped. Brave Lite (scraping) and Google (GSA UA scraping) work without keys.
+Engines not listed in `engines.local_ported` are proxied to an upstream SearXNG instance if `upstream.url` is configured.
## Architecture
```
-┌───────────────────────────────────────┐
-│ HTTP Handler │
-│ /search / /opensearch.xml │
-├───────────────────────────────────────┤
-│ Middleware Chain │
-│ Global → Burst → Per-IP → CORS │
-├───────────────────────────────────────┤
-│ Search Service │
-│ Parallel engine execution │
-│ WaitGroup + graceful degradation │
-├───────────────────────────────────────┤
-│ Cache Layer │
-│ Valkey/Redis (optional; no-op if │
-│ unconfigured) │
-├───────────────────────────────────────┤
-│ Engines (×11 default) │
-│ Each runs in its own goroutine │
-│ Failures → unresponsive_engines │
-└───────────────────────────────────────┘
+┌─────────────────────────────────────┐
+│ HTTP Handler │
+│ /search / /opensearch.xml │
+├─────────────────────────────────────┤
+│ Middleware Chain │
+│ Global → Burst → Per-IP → CORS │
+├─────────────────────────────────────┤
+│ Search Service │
+│ Parallel engine execution │
+│ WaitGroup + graceful degradation │
+├─────────────────────────────────────┤
+│ Cache Layer │
+│ Valkey/Redis (optional, no-op if │
+│ unconfigured) │
+├─────────────────────────────────────┤
+│ Engines (×9) │
+│ Each runs in its own goroutine │
+│ Failures → unresponsive_engines │
+└─────────────────────────────────────┘
```
## Docker
-The Dockerfile uses a multi-stage build with a static Go binary on alpine Linux:
+The Dockerfile uses a multi-stage build:
+
+```dockerfile
+# Build stage: golang:1.24-alpine
+# Runtime stage: alpine:3.21 (~20MB)
+# CGO_ENABLED=0 — static binary
+```
```bash
-# Build: golang:1.24-alpine
-# Runtime: alpine:3.21 (~20MB)
-# CGO_ENABLED=0 — fully static
docker compose up -d
```
Includes Valkey 8 with health checks out of the box.
-## Contributing
-
-See [docs/CONTRIBUTING.md](docs/CONTRIBUTING.md) for a walkthrough of adding a new engine. The interface is two methods: `Name()` and `Search(context, request)`.
-
## License
-[AGPLv3](https://www.gnu.org/licenses/agpl-3.0.html)
+MIT
diff --git a/cmd/samsa/main.go b/cmd/searxng-go/main.go
similarity index 63%
rename from cmd/samsa/main.go
rename to cmd/searxng-go/main.go
index 199033b..dac6258 100644
--- a/cmd/samsa/main.go
+++ b/cmd/searxng-go/main.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package main
import (
@@ -25,13 +9,13 @@ import (
"net/http"
"os"
- "github.com/metamorphosis-dev/samsa/internal/autocomplete"
- "github.com/metamorphosis-dev/samsa/internal/cache"
- "github.com/metamorphosis-dev/samsa/internal/config"
- "github.com/metamorphosis-dev/samsa/internal/httpapi"
- "github.com/metamorphosis-dev/samsa/internal/middleware"
- "github.com/metamorphosis-dev/samsa/internal/search"
- "github.com/metamorphosis-dev/samsa/internal/views"
+ "github.com/metamorphosis-dev/kafka/internal/autocomplete"
+ "github.com/metamorphosis-dev/kafka/internal/cache"
+ "github.com/metamorphosis-dev/kafka/internal/config"
+ "github.com/metamorphosis-dev/kafka/internal/httpapi"
+ "github.com/metamorphosis-dev/kafka/internal/middleware"
+ "github.com/metamorphosis-dev/kafka/internal/search"
+ "github.com/metamorphosis-dev/kafka/internal/views"
)
func main() {
@@ -69,28 +53,21 @@ func main() {
}
svc := search.NewService(search.ServiceConfig{
- UpstreamURL: cfg.Upstream.URL,
- HTTPTimeout: cfg.HTTPTimeout(),
- Cache: searchCache,
- EnginesConfig: cfg,
+ UpstreamURL: cfg.Upstream.URL,
+ HTTPTimeout: cfg.HTTPTimeout(),
+ Cache: searchCache,
})
acSvc := autocomplete.NewService(cfg.Upstream.URL, cfg.HTTPTimeout())
- h := httpapi.NewHandler(svc, acSvc.Suggestions, cfg.Server.SourceURL, searchCache)
+ h := httpapi.NewHandler(svc, acSvc.Suggestions)
mux := http.NewServeMux()
-
- // HTML template routes
mux.HandleFunc("/", h.Index)
- mux.HandleFunc("/search", h.Search)
- mux.HandleFunc("/preferences", h.Preferences)
-
- // API routes
mux.HandleFunc("/healthz", h.Healthz)
+ mux.HandleFunc("/search", h.Search)
mux.HandleFunc("/autocompleter", h.Autocompleter)
mux.HandleFunc("/opensearch.xml", h.OpenSearch(cfg.Server.BaseURL))
- mux.HandleFunc("/favicon/", h.Favicon)
// Serve embedded static files (CSS, JS, images).
staticFS, err := views.StaticFS()
@@ -100,9 +77,8 @@ func main() {
var subFS fs.FS = staticFS
mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.FS(subFS))))
- // Apply middleware: global rate limit → burst rate limit → per-IP rate limit → CORS → security headers → handler.
+ // Apply middleware: global rate limit → burst rate limit → per-IP rate limit → CORS → handler.
var handler http.Handler = mux
- handler = middleware.SecurityHeaders(middleware.SecurityHeadersConfig{})(handler)
handler = middleware.CORS(middleware.CORSConfig{
AllowedOrigins: cfg.CORS.AllowedOrigins,
AllowedMethods: cfg.CORS.AllowedMethods,
@@ -114,7 +90,6 @@ func main() {
Requests: cfg.RateLimit.Requests,
Window: cfg.RateLimitWindow(),
CleanupInterval: cfg.RateLimitCleanupInterval(),
- TrustedProxies: cfg.RateLimit.TrustedProxies,
}, logger)(handler)
handler = middleware.GlobalRateLimit(middleware.GlobalRateLimitConfig{
Requests: cfg.GlobalRateLimit.Requests,
@@ -128,7 +103,7 @@ func main() {
}, logger)(handler)
addr := fmt.Sprintf(":%d", cfg.Server.Port)
- logger.Info("samsa starting",
+ logger.Info("searxng-go starting",
"addr", addr,
"cache", searchCache.Enabled(),
"rate_limit", cfg.RateLimit.Requests > 0,
diff --git a/config.example.toml b/config.example.toml
index 7fed53e..df77184 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -1,34 +1,28 @@
-# samsa configuration
+# kafka configuration
# Copy to config.toml and adjust as needed.
# Environment variables are used as fallbacks when a config field is empty/unset.
[server]
# Listen port (env: PORT)
-port = 5355
+port = 8080
# HTTP timeout for engine and upstream calls (env: HTTP_TIMEOUT)
http_timeout = "10s"
# Public base URL for OpenSearch XML (env: BASE_URL)
-# Set this so browsers can add samsa as a search engine.
+# Set this so browsers can add kafka as a search engine.
# Example: "https://search.example.com"
base_url = ""
-# Link to the source code (shown in footer as "Source" link)
-# Defaults to the upstream samsa repo if not set.
-# Example: "https://git.example.com/my-samsa-fork"
-source_url = ""
-
[upstream]
-# URL of an upstream metasearch instance for unported engines (env: UPSTREAM_SEARXNG_URL)
+# URL of an upstream SearXNG instance for unported engines (env: UPSTREAM_SEARXNG_URL)
# Leave empty to run without an upstream proxy.
url = ""
[engines]
# Comma-separated list of engines to execute locally in Go (env: LOCAL_PORTED_ENGINES)
-# Engines not listed here will be proxied to the upstream instance.
-# Include bing_images, ddg_images, qwant_images for image search when [upstream].url is empty.
-local_ported = ["wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"]
+# Engines not listed here will be proxied to upstream SearXNG.
+local_ported = ["wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"]
[engines.brave]
# Brave Search API key (env: BRAVE_API_KEY)
@@ -41,10 +35,6 @@ access_token = ""
category = "web-lite"
results_per_page = 10
-[engines.youtube]
-# YouTube Data API v3 key (env: YOUTUBE_API_KEY)
-api_key = ""
-
[cache]
# Valkey/Redis cache for search results.
# Leave address empty to disable caching entirely.
@@ -57,12 +47,6 @@ db = 0
# Cache TTL for search results (env: VALKEY_CACHE_TTL)
default_ttl = "5m"
-[cache.ttl_overrides]
-# Per-engine TTL overrides (uncomment to use):
-# wikipedia = "48h"
-# reddit = "15m"
-# braveapi = "2h"
-
[cors]
# CORS configuration for browser-based clients.
# Allowed origins: use "*" for all, or specific domains (env: CORS_ALLOWED_ORIGINS)
diff --git a/docker-compose.yml b/docker-compose.yml
index 538713f..98eae96 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,7 +8,7 @@ services:
kafka:
build: .
ports:
- - "5355:5355"
+ - "8080:8080"
volumes:
- ./config.toml:/etc/kafka/config.toml:ro
depends_on:
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
deleted file mode 100644
index bada2e2..0000000
--- a/docs/CONTRIBUTING.md
+++ /dev/null
@@ -1,218 +0,0 @@
-# Contributing — Adding a New Engine
-
-This guide walks through adding a new search engine to samsa. The minimal engine needs only an HTTP client, a query, and a result parser.
-
----
-
-## 1. Create the engine file
-
-Place it in `internal/engines/`:
-
-```
-internal/engines/
- myengine.go ← your engine
- myengine_test.go ← tests (required)
-```
-
-Name the struct after the engine, e.g. `WolframEngine` for "wolfram". The `Name()` method returns the engine key used throughout samsa.
-
-## 2. Implement the Engine interface
-
-```go
-package engines
-
-import (
- "context"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-type MyEngine struct {
- client *http.Client
-}
-
-func (e *MyEngine) Name() string { return "myengine" }
-
-func (e *MyEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
- // ...
-}
-```
-
-### The SearchRequest fields you'll use most:
-
-| Field | Type | Description |
-|-------|------|-------------|
-| `Query` | `string` | The search query |
-| `Pageno` | `int` | Current page number (1-based) |
-| `Safesearch` | `int` | 0=off, 1=moderate, 2=strict |
-| `Language` | `string` | ISO language code (e.g. `"en"`) |
-
-### The SearchResponse to return:
-
-```go
-contracts.SearchResponse{
- Query: req.Query,
- NumberOfResults: len(results),
- Results: results, // []MainResult
- Answers: []map[string]any{},
- Corrections: []string{},
- Infoboxes: []map[string]any{},
- Suggestions: []string{},
- UnresponsiveEngines: [][2]string{},
-}
-```
-
-### Empty query — return early:
-
-```go
-if strings.TrimSpace(req.Query) == "" {
- return contracts.SearchResponse{Query: req.Query}, nil
-}
-```
-
-### Engine unavailable / error — graceful degradation:
-
-```go
-// Rate limited or blocked
-return contracts.SearchResponse{
- Query: req.Query,
- UnresponsiveEngines: [][2]string{{"myengine", "reason"}},
- Results: []contracts.MainResult{},
- // ... empty other fields
-}, nil
-
-// Hard error — return it
-return contracts.SearchResponse{}, fmt.Errorf("myengine upstream error: status %d", resp.StatusCode)
-```
-
-## 3. Build the result
-
-```go
-urlPtr := "https://example.com/result"
-result := contracts.MainResult{
- Title: "Result Title",
- Content: "Snippet or description text",
- URL: &urlPtr, // pointer to string, required
- Engine: "myengine",
- Category: "general", // or "it", "science", "videos", "images", "social media"
- Score: 0, // used for relevance ranking during merge
- Engines: []string{"myengine"},
-}
-```
-
-### Template field
-
-The template system checks for `"videos"` and `"images"`. Everything else renders via `result_item.html`. Set `Template` only if you have a custom template; omit it for the default result card.
-
-### Category field
-
-Controls which category tab the result appears under and which engines are triggered:
-
-| Category | Engines used |
-|----------|-------------|
-| `general` | google, bing, ddg, brave, braveapi, qwant, wikipedia |
-| `it` | github, stackoverflow |
-| `science` | arxiv, crossref |
-| `videos` | youtube |
-| `images` | bing_images, ddg_images, qwant_images |
-| `social media` | reddit |
-
-## 4. Wire it into the factory
-
-In `internal/engines/factory.go`, add your engine to the map returned by `NewDefaultPortedEngines`:
-
-```go
-"myengine": &MyEngine{client: client},
-```
-
-If your engine needs an API key, read it from config or the environment (see `braveapi` or `youtube` in factory.go for the pattern).
-
-## 5. Register defaults
-
-In `internal/engines/planner.go`:
-
-**Add to `defaultPortedEngines`:**
-```go
-var defaultPortedEngines = []string{
- // ... existing ...
- "myengine",
-}
-```
-
-**Add to category mapping in `inferFromCategories`** (if applicable):
-```go
-case "general":
- set["myengine"] = true
-```
-
-**Update the sort order map** so results maintain consistent ordering:
-```go
-order := map[string]int{
- // ... existing ...
- "myengine": N, // pick a slot
-}
-```
-
-## 6. Add tests
-
-At minimum, test:
-- `Name()` returns the correct string
-- Nil engine returns an error
-- Empty query returns zero results
-- Successful API response parses correctly
-- Rate limit / error cases return `UnresponsiveEngines` with a reason
-
-Use `httptest.NewServer` to mock the upstream API. See `arxiv_test.go` or `reddit_test.go` for examples.
-
-## 7. Build and test
-
-```bash
-go build ./...
-go test ./internal/engines/ -run MyEngine -v
-go test ./...
-```
-
-## Example: Adding an RSS-based engine
-
-If the engine provides an RSS feed, the parsing is straightforward:
-
-```go
-type rssItem struct {
- Title string `xml:"title"`
- Link string `xml:"link"`
- Description string `xml:"description"`
-}
-
-type rssFeed struct {
- Channel struct {
- Items []rssItem `xml:"item"`
- } `xml:"channel"`
-}
-
-dec := xml.NewDecoder(resp.Body)
-var feed rssFeed
-dec.Decode(&feed)
-
-for _, item := range feed.Channel.Items {
- urlPtr := item.Link
- results = append(results, contracts.MainResult{
- Title: item.Title,
- Content: stripHTML(item.Description),
- URL: &urlPtr,
- Engine: "myengine",
- // ...
- })
-}
-```
-
-## Checklist
-
-- [ ] Engine file created in `internal/engines/`
-- [ ] `Engine` interface implemented (`Name()` + `Search()`)
-- [ ] Empty query handled (return early, no error)
-- [ ] Graceful degradation for errors and rate limits
-- [ ] Results use `Category` to group with related engines
-- [ ] Factory updated with new engine
-- [ ] Planner updated (defaults + category mapping + sort order)
-- [ ] Tests written covering main paths
-- [ ] `go build ./...` succeeds
-- [ ] `go test ./...` passes
diff --git a/docs/superpowers/plans/2026-03-24-per-engine-ttl-cache.md b/docs/superpowers/plans/2026-03-24-per-engine-ttl-cache.md
deleted file mode 100644
index d9ca154..0000000
--- a/docs/superpowers/plans/2026-03-24-per-engine-ttl-cache.md
+++ /dev/null
@@ -1,789 +0,0 @@
-# Per-Engine TTL Cache — Implementation Plan
-
-> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** Replace the merged-response cache with per-engine response caching, enabling tier-based TTLs and stale-while-revalidate semantics.
-
-**Architecture:** Each engine's raw response is cached independently with its tier-based TTL. On stale hits, return cached data immediately and refresh in background. Query hash is computed from shared params (query, pageno, safesearch, language, time_range) and prefixed with engine name for the cache key.
-
-**Tech Stack:** Go 1.24, Valkey/Redis (go-redis/v9), existing samsa contracts
-
----
-
-## File Map
-
-| Action | File | Responsibility |
-|--------|------|----------------|
-| Create | `internal/cache/tiers.go` | Tier definitions, `EngineTier()` function |
-| Create | `internal/cache/tiers_test.go` | Tests for EngineTier |
-| Create | `internal/cache/engine_cache.go` | `EngineCache` struct with tier-aware Get/Set |
-| Create | `internal/cache/engine_cache_test.go` | Tests for EngineCache |
-| Modify | `internal/cache/cache.go` | Add `QueryHash()`, add `CachedEngineResponse` type |
-| Modify | `internal/cache/cache_test.go` | Add tests for `QueryHash()` |
-| Modify | `internal/config/config.go` | Add `TTLOverrides` to `CacheConfig` |
-| Modify | `internal/search/service.go` | Use `EngineCache`, parallel lookups, background refresh |
-
----
-
-## Task 1: Add QueryHash and CachedEngineResponse to cache.go
-
-**Files:**
-- Modify: `internal/cache/cache.go`
-- Modify: `internal/cache/cache_test.go`
-
-- [ ] **Step 1: Write failing test for QueryHash()**
-
-```go
-// In cache_test.go, add:
-
-func TestQueryHash(t *testing.T) {
- // Same params should produce same hash
- hash1 := QueryHash("golang", 1, 0, "en", "")
- hash2 := QueryHash("golang", 1, 0, "en", "")
- if hash1 != hash2 {
- t.Errorf("QueryHash: same params should produce same hash, got %s != %s", hash1, hash2)
- }
-
- // Different query should produce different hash
- hash3 := QueryHash("rust", 1, 0, "en", "")
- if hash1 == hash3 {
- t.Errorf("QueryHash: different queries should produce different hash")
- }
-
- // Different pageno should produce different hash
- hash4 := QueryHash("golang", 2, 0, "en", "")
- if hash1 == hash4 {
- t.Errorf("QueryHash: different pageno should produce different hash")
- }
-
- // time_range should affect hash
- hash5 := QueryHash("golang", 1, 0, "en", "day")
- if hash1 == hash5 {
- t.Errorf("QueryHash: different time_range should produce different hash")
- }
-
- // Hash should be 16 characters (truncated SHA-256)
- if len(hash1) != 16 {
- t.Errorf("QueryHash: expected 16 char hash, got %d", len(hash1))
- }
-}
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run: `nix develop --command bash -c "go test -run TestQueryHash ./internal/cache/ -v"`
-Expected: FAIL — "QueryHash not defined"
-
-- [ ] **Step 3: Implement QueryHash() and CachedEngineResponse in cache.go**
-
-Add to `cache.go` (the imports `crypto/sha256` and `encoding/hex` are already present in cache.go from the existing `Key()` function):
-
-```go
-// QueryHash computes a deterministic hash from shared request parameters
-// (query, pageno, safesearch, language, time_range) for use as a cache key suffix.
-// The hash is a truncated SHA-256 (16 hex chars).
-func QueryHash(query string, pageno int, safesearch int, language, timeRange string) string {
- h := sha256.New()
- fmt.Fprintf(h, "q=%s|", query)
- fmt.Fprintf(h, "pageno=%d|", pageno)
- fmt.Fprintf(h, "safesearch=%d|", safesearch)
- fmt.Fprintf(h, "lang=%s|", language)
- if timeRange != "" {
- fmt.Fprintf(h, "tr=%s|", timeRange)
- }
- return hex.EncodeToString(h.Sum(nil))[:16]
-}
-
-// CachedEngineResponse wraps an engine's cached response with metadata.
-type CachedEngineResponse struct {
- Engine string
- Response []byte
- StoredAt time.Time
-}
-```
-
-- [ ] **Step 4: Run test to verify it passes**
-
-Run: `nix develop --command bash -c "go test -run TestQueryHash ./internal/cache/ -v"`
-Expected: PASS
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add internal/cache/cache.go internal/cache/cache_test.go
-git commit -m "cache: add QueryHash and CachedEngineResponse type"
-```
-
----
-
-## Task 2: Create tiers.go with tier definitions
-
-**Files:**
-- Create: `internal/cache/tiers.go`
-
-- [ ] **Step 1: Create tiers.go with tier definitions and EngineTier function**
-
-```go
-package cache
-
-import "time"
-
-// TTLTier represents a cache TTL tier with a name and duration.
-type TTLTier struct {
- Name string
- Duration time.Duration
-}
-
-// defaultTiers maps engine names to their default TTL tiers.
-var defaultTiers = map[string]TTLTier{
- // Static knowledge engines — rarely change
- "wikipedia": {Name: "static", Duration: 24 * time.Hour},
- "wikidata": {Name: "static", Duration: 24 * time.Hour},
- "arxiv": {Name: "static", Duration: 24 * time.Hour},
- "crossref": {Name: "static", Duration: 24 * time.Hour},
- "stackoverflow": {Name: "static", Duration: 24 * time.Hour},
- "github": {Name: "static", Duration: 24 * time.Hour},
-
- // API-based general search — fresher data
- "braveapi": {Name: "api_general", Duration: 1 * time.Hour},
- "youtube": {Name: "api_general", Duration: 1 * time.Hour},
-
- // Scraped general search — moderately stable
- "google": {Name: "scraped_general", Duration: 2 * time.Hour},
- "bing": {Name: "scraped_general", Duration: 2 * time.Hour},
- "duckduckgo": {Name: "scraped_general", Duration: 2 * time.Hour},
- "qwant": {Name: "scraped_general", Duration: 2 * time.Hour},
- "brave": {Name: "scraped_general", Duration: 2 * time.Hour},
-
- // News/social — changes frequently
- "reddit": {Name: "news_social", Duration: 30 * time.Minute},
-
- // Image search
- "bing_images": {Name: "images", Duration: 1 * time.Hour},
- "ddg_images": {Name: "images", Duration: 1 * time.Hour},
- "qwant_images": {Name: "images", Duration: 1 * time.Hour},
-}
-
-// EngineTier returns the TTL tier for an engine, applying overrides if provided.
-// If the engine has no defined tier, returns a default of 1 hour.
-func EngineTier(engineName string, overrides map[string]time.Duration) TTLTier {
- // Check override first — override tier name is just the engine name
- if override, ok := overrides[engineName]; ok && override > 0 {
- return TTLTier{Name: engineName, Duration: override}
- }
-
- // Fall back to default tier
- if tier, ok := defaultTiers[engineName]; ok {
- return tier
- }
-
- // Unknown engines get a sensible default
- return TTLTier{Name: "unknown", Duration: 1 * time.Hour}
-}
-```
-
-- [ ] **Step 2: Run go vet to verify it compiles**
-
-Run: `nix develop --command bash -c "go vet ./internal/cache/tiers.go"`
-Expected: no output (success)
-
-- [ ] **Step 3: Write a basic test for EngineTier**
-
-```go
-// In internal/cache/tiers_test.go:
-
-package cache
-
-import "testing"
-
-func TestEngineTier(t *testing.T) {
- // Test default static tier
- tier := EngineTier("wikipedia", nil)
- if tier.Name != "static" || tier.Duration != 24*time.Hour {
- t.Errorf("wikipedia: expected static/24h, got %s/%v", tier.Name, tier.Duration)
- }
-
- // Test default api_general tier
- tier = EngineTier("braveapi", nil)
- if tier.Name != "api_general" || tier.Duration != 1*time.Hour {
- t.Errorf("braveapi: expected api_general/1h, got %s/%v", tier.Name, tier.Duration)
- }
-
- // Test override takes precedence — override tier name is just the engine name
- override := 48 * time.Hour
- tier = EngineTier("wikipedia", map[string]time.Duration{"wikipedia": override})
- if tier.Name != "wikipedia" || tier.Duration != 48*time.Hour {
- t.Errorf("wikipedia override: expected wikipedia/48h, got %s/%v", tier.Name, tier.Duration)
- }
-
- // Test unknown engine gets default
- tier = EngineTier("unknown_engine", nil)
- if tier.Name != "unknown" || tier.Duration != 1*time.Hour {
- t.Errorf("unknown engine: expected unknown/1h, got %s/%v", tier.Name, tier.Duration)
- }
-}
-```
-
-- [ ] **Step 4: Run test to verify it passes**
-
-Run: `nix develop --command bash -c "go test -run TestEngineTier ./internal/cache/ -v"`
-Expected: PASS
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add internal/cache/tiers.go internal/cache/tiers_test.go
-git commit -m "cache: add tier definitions and EngineTier function"
-```
-
----
-
-## Task 3: Create EngineCache in engine_cache.go
-
-**Files:**
-- Create: `internal/cache/engine_cache.go`
-- Create: `internal/cache/engine_cache_test.go`
-
-**Note:** The existing `Key()` function in `cache.go` is still used for favicon caching. The new `QueryHash()` and `EngineCache` are separate and only for per-engine search response caching.
-
-- [ ] **Step 1: Write failing test for EngineCache.Get/Set**
-
-```go
-package cache
-
-import (
- "context"
- "testing"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-func TestEngineCacheGetSet(t *testing.T) {
- // Create a disabled cache for unit testing (nil client)
- c := &Cache{logger: slog.Default()}
- ec := NewEngineCache(c, nil)
-
- ctx := context.Background()
- cached, ok := ec.Get(ctx, "wikipedia", "abc123")
- if ok {
- t.Errorf("Get on disabled cache: expected false, got %v", ok)
- }
- _ = cached // unused when ok=false
-}
-
-func TestEngineCacheKeyFormat(t *testing.T) {
- key := engineCacheKey("wikipedia", "abc123")
- if key != "samsa:resp:wikipedia:abc123" {
- t.Errorf("engineCacheKey: expected samsa:resp:wikipedia:abc123, got %s", key)
- }
-}
-
-func TestEngineCacheIsStale(t *testing.T) {
- c := &Cache{logger: slog.Default()}
- ec := NewEngineCache(c, nil)
-
- // Fresh response (stored 1 minute ago, wikipedia has 24h TTL)
- fresh := CachedEngineResponse{
- Engine: "wikipedia",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-1 * time.Minute),
- }
- if ec.IsStale(fresh, "wikipedia") {
- t.Errorf("IsStale: 1-minute-old wikipedia should NOT be stale")
- }
-
- // Stale response (stored 25 hours ago)
- stale := CachedEngineResponse{
- Engine: "wikipedia",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-25 * time.Hour),
- }
- if !ec.IsStale(stale, "wikipedia") {
- t.Errorf("IsStale: 25-hour-old wikipedia SHOULD be stale (24h TTL)")
- }
-
- // Override: 30 minute TTL for reddit
- overrides := map[string]time.Duration{"reddit": 30 * time.Minute}
- ec2 := NewEngineCache(c, overrides)
-
- // 20 minutes old with 30m override should NOT be stale
- redditFresh := CachedEngineResponse{
- Engine: "reddit",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-20 * time.Minute),
- }
- if ec2.IsStale(redditFresh, "reddit") {
- t.Errorf("IsStale: 20-min reddit with 30m override should NOT be stale")
- }
-
- // 45 minutes old with 30m override SHOULD be stale
- redditStale := CachedEngineResponse{
- Engine: "reddit",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-45 * time.Minute),
- }
- if !ec2.IsStale(redditStale, "reddit") {
- t.Errorf("IsStale: 45-min reddit with 30m override SHOULD be stale")
- }
-}
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run: `nix develop --command bash -c "go test -run TestEngineCache ./internal/cache/ -v"`
-Expected: FAIL — "EngineCache not defined" or "CachedEngineResponse not defined"
-
-- [ ] **Step 3: Implement EngineCache using GetBytes/SetBytes**
-
-The `EngineCache` uses the existing `GetBytes`/`SetBytes` public methods on `Cache` (the `client` field is unexported so we must use those methods).
-
-```go
-package cache
-
-import (
- "context"
- "encoding/json"
- "log/slog"
- "time"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-// EngineCache wraps Cache with per-engine tier-aware Get/Set operations.
-type EngineCache struct {
- cache *Cache
- overrides map[string]time.Duration
-}
-
-// NewEngineCache creates a new EngineCache with optional TTL overrides.
-// If overrides is nil, default tier durations are used.
-func NewEngineCache(cache *Cache, overrides map[string]time.Duration) *EngineCache {
- return &EngineCache{
- cache: cache,
- overrides: overrides,
- }
-}
-
-// Get retrieves a cached engine response. Returns (zero value, false) if not
-// found or if cache is disabled.
-func (ec *EngineCache) Get(ctx context.Context, engine, queryHash string) (CachedEngineResponse, bool) {
- key := engineCacheKey(engine, queryHash)
-
- data, ok := ec.cache.GetBytes(ctx, key)
- if !ok {
- return CachedEngineResponse{}, false
- }
-
- var cached CachedEngineResponse
- if err := json.Unmarshal(data, &cached); err != nil {
- ec.cache.logger.Warn("engine cache hit but unmarshal failed", "key", key, "error", err)
- return CachedEngineResponse{}, false
- }
-
- ec.cache.logger.Debug("engine cache hit", "key", key, "engine", engine)
- return cached, true
-}
-
-// Set stores an engine response in the cache with the engine's tier TTL.
-func (ec *EngineCache) Set(ctx context.Context, engine, queryHash string, resp contracts.SearchResponse) {
- if !ec.cache.Enabled() {
- return
- }
-
- data, err := json.Marshal(resp)
- if err != nil {
- ec.cache.logger.Warn("engine cache set: marshal failed", "engine", engine, "error", err)
- return
- }
-
- tier := EngineTier(engine, ec.overrides)
- key := engineCacheKey(engine, queryHash)
-
- cached := CachedEngineResponse{
- Engine: engine,
- Response: data,
- StoredAt: time.Now(),
- }
-
- cachedData, err := json.Marshal(cached)
- if err != nil {
- ec.cache.logger.Warn("engine cache set: wrap marshal failed", "key", key, "error", err)
- return
- }
-
- ec.cache.SetBytes(ctx, key, cachedData, tier.Duration)
-}
-
-// IsStale returns true if the cached response is older than the tier's TTL.
-func (ec *EngineCache) IsStale(cached CachedEngineResponse, engine string) bool {
- tier := EngineTier(engine, ec.overrides)
- return time.Since(cached.StoredAt) > tier.Duration
-}
-
-// Logger returns the logger for background refresh logging.
-func (ec *EngineCache) Logger() *slog.Logger {
- return ec.cache.logger
-}
-
-// engineCacheKey builds the cache key for an engine+query combination.
-func engineCacheKey(engine, queryHash string) string {
- return "samsa:resp:" + engine + ":" + queryHash
-}
-```
-
-- [ ] **Step 4: Run tests to verify they pass**
-
-Run: `nix develop --command bash -c "go test -run TestEngineCache ./internal/cache/ -v"`
-Expected: PASS
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add internal/cache/engine_cache.go internal/cache/engine_cache_test.go
-git commit -m "cache: add EngineCache with tier-aware Get/Set"
-```
-
----
-
-## Task 4: Add TTLOverrides to config
-
-**Files:**
-- Modify: `internal/config/config.go`
-
-- [ ] **Step 1: Add TTLOverrides to CacheConfig**
-
-In `CacheConfig` struct, add:
-
-```go
-type CacheConfig struct {
- Address string `toml:"address"`
- Password string `toml:"password"`
- DB int `toml:"db"`
- DefaultTTL string `toml:"default_ttl"`
- TTLOverrides map[string]string `toml:"ttl_overrides"` // engine -> duration string
-}
-```
-
-- [ ] **Step 2: Add TTLOverridesParsed() method to Config**
-
-Add after `CacheTTL()`:
-
-```go
-// CacheTTLOverrides returns parsed TTL overrides from config.
-func (c *Config) CacheTTLOverrides() map[string]time.Duration {
- if len(c.Cache.TTLOverrides) == 0 {
- return nil
- }
- out := make(map[string]time.Duration, len(c.Cache.TTLOverrides))
- for engine, durStr := range c.Cache.TTLOverrides {
- if d, err := time.ParseDuration(durStr); err == nil && d > 0 {
- out[engine] = d
- }
- }
- return out
-}
-```
-
-- [ ] **Step 3: Run tests to verify nothing breaks**
-
-Run: `nix develop --command bash -c "go test ./internal/config/ -v"`
-Expected: PASS
-
-- [ ] **Step 4: Commit**
-
-```bash
-git add internal/config/config.go
-git commit -m "config: add TTLOverrides to CacheConfig"
-```
-
----
-
-## Task 5: Wire EngineCache into search service
-
-**Files:**
-- Modify: `internal/search/service.go`
-
-- [ ] **Step 1: Read the current service.go to understand wiring**
-
-The service currently takes `*Cache` in `ServiceConfig`. We need to change it to take `*EngineCache` or change the field type.
-
-- [ ] **Step 2: Modify Service struct and NewService to use EngineCache**
-
-Change `Service`:
-
-```go
-type Service struct {
- upstreamClient *upstream.Client
- planner *engines.Planner
- localEngines map[string]engines.Engine
- engineCache *cache.EngineCache
-}
-```
-
-Change `NewService`:
-
-```go
-func NewService(cfg ServiceConfig) *Service {
- timeout := cfg.HTTPTimeout
- if timeout <= 0 {
- timeout = 10 * time.Second
- }
-
- httpClient := httpclient.NewClient(timeout)
-
- var up *upstream.Client
- if cfg.UpstreamURL != "" {
- c, err := upstream.NewClient(cfg.UpstreamURL, timeout)
- if err == nil {
- up = c
- }
- }
-
- var engineCache *cache.EngineCache
- if cfg.Cache != nil {
- engineCache = cache.NewEngineCache(cfg.Cache, cfg.CacheTTLOverrides)
- }
-
- return &Service{
- upstreamClient: up,
- planner: engines.NewPlannerFromEnv(),
- localEngines: engines.NewDefaultPortedEngines(httpClient, cfg.EnginesConfig),
- engineCache: engineCache,
- }
-}
-```
-
-Add `CacheTTLOverrides` to `ServiceConfig`:
-
-```go
-type ServiceConfig struct {
- UpstreamURL string
- HTTPTimeout time.Duration
- Cache *cache.Cache
- CacheTTLOverrides map[string]time.Duration
- EnginesConfig *config.Config
-}
-```
-
-- [ ] **Step 3: Rewrite Search() with correct stale-while-revalidate logic**
-
-The stale-while-revalidate flow:
-
-1. **Cache lookup (Phase 1)**: Check cache for each engine in parallel. Classify each as:
- - Fresh hit: cache has data AND not stale → deserialize, mark as `fresh`
- - Stale hit: cache has data AND stale → keep in `cached`, no `fresh` yet
- - Miss: cache has no data → `hit=false`, no `cached` or `fresh`
-
-2. **Fetch (Phase 2)**: For each engine:
- - Fresh hit: return immediately, no fetch needed
- - Stale hit: return stale data immediately, fetch fresh in background
- - Miss: fetch fresh synchronously, cache result
-
-3. **Collect (Phase 3)**: Collect all responses for merge.
-
-```go
-// Search executes the request against local engines (in parallel) and
-// optionally the upstream instance for unported engines.
-func (s *Service) Search(ctx context.Context, req SearchRequest) (SearchResponse, error) {
- queryHash := cache.QueryHash(
- req.Query,
- int(req.Pageno),
- int(req.Safesearch),
- req.Language,
- derefString(req.TimeRange),
- )
-
- localEngineNames, upstreamEngineNames, _ := s.planner.Plan(req)
-
- // Phase 1: Parallel cache lookups — classify each engine as fresh/stale/miss
- type cacheResult struct {
- engine string
- cached cache.CachedEngineResponse
- hit bool
- fresh contracts.SearchResponse
- fetchErr error
- unmarshalErr bool // true if hit but unmarshal failed (treat as miss)
- }
-
- cacheResults := make([]cacheResult, len(localEngineNames))
-
- var lookupWg sync.WaitGroup
- for i, name := range localEngineNames {
- lookupWg.Add(1)
- go func(i int, name string) {
- defer lookupWg.Done()
-
- result := cacheResult{engine: name}
-
- if s.engineCache != nil {
- cached, ok := s.engineCache.Get(ctx, name, queryHash)
- if ok {
- result.hit = true
- result.cached = cached
- if !s.engineCache.IsStale(cached, name) {
- // Fresh cache hit — deserialize and use directly
- var resp contracts.SearchResponse
- if err := json.Unmarshal(cached.Response, &resp); err == nil {
- result.fresh = resp
- } else {
- // Unmarshal failed — treat as cache miss (will fetch fresh synchronously)
- result.unmarshalErr = true
- result.hit = false // treat as miss
- }
- }
- // If stale: result.fresh stays zero, result.cached has stale data
- }
- }
-
- cacheResults[i] = result
- }(i, name)
- }
- lookupWg.Wait()
-
- // Phase 2: Fetch fresh for misses and stale entries
- var fetchWg sync.WaitGroup
- for i, name := range localEngineNames {
- cr := cacheResults[i]
-
- // Fresh hit — nothing to do in phase 2
- if cr.hit && cr.fresh.Response != nil {
- continue
- }
-
- // Stale hit — return stale immediately, refresh in background
- if cr.hit && cr.cached.Response != nil && s.engineCache != nil && s.engineCache.IsStale(cr.cached, name) {
- fetchWg.Add(1)
- go func(name string) {
- defer fetchWg.Done()
- eng, ok := s.localEngines[name]
- if !ok {
- return
- }
- freshResp, err := eng.Search(ctx, req)
- if err != nil {
- s.engineCache.Logger().Debug("background refresh failed", "engine", name, "error", err)
- return
- }
- s.engineCache.Set(ctx, name, queryHash, freshResp)
- }(name)
- continue
- }
-
- // Cache miss — fetch fresh synchronously
- if !cr.hit {
- fetchWg.Add(1)
- go func(i int, name string) {
- defer fetchWg.Done()
-
- eng, ok := s.localEngines[name]
- if !ok {
- cacheResults[i] = cacheResult{
- engine: name,
- fetchErr: fmt.Errorf("engine not registered: %s", name),
- }
- return
- }
-
- freshResp, err := eng.Search(ctx, req)
- if err != nil {
- cacheResults[i] = cacheResult{
- engine: name,
- fetchErr: err,
- }
- return
- }
-
- // Cache the fresh response
- if s.engineCache != nil {
- s.engineCache.Set(ctx, name, queryHash, freshResp)
- }
-
- cacheResults[i] = cacheResult{
- engine: name,
- fresh: freshResp,
- hit: false,
- }
- }(i, name)
- }
- }
- fetchWg.Wait()
-
- // Phase 3: Collect responses for merge
- responses := make([]contracts.SearchResponse, 0, len(cacheResults))
-
- for _, cr := range cacheResults {
- if cr.fetchErr != nil {
- responses = append(responses, unresponsiveResponse(req.Query, cr.engine, cr.fetchErr.Error()))
- continue
- }
- // Use fresh data if available (fresh hit or freshly fetched), otherwise use stale cached
- if cr.fresh.Response != nil {
- responses = append(responses, cr.fresh)
- } else if cr.hit && cr.cached.Response != nil {
- var resp contracts.SearchResponse
- if err := json.Unmarshal(cr.cached.Response, &resp); err == nil {
- responses = append(responses, resp)
- }
- }
- }
-
- // ... rest of upstream proxy and merge logic (unchanged) ...
-}
-```
-
-Note: The imports need `encoding/json` and `fmt` added. The existing imports in service.go already include `sync` and `time`.
-
-- [ ] **Step 4: Run tests to verify compilation**
-
-Run: `nix develop --command bash -c "go build ./internal/search/"`
-Expected: no output (success)
-
-- [ ] **Step 5: Run full test suite**
-
-Run: `nix develop --command bash -c "go test ./..."`
-Expected: All pass
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add internal/search/service.go
-git commit -m "search: wire per-engine cache with tier-aware TTLs"
-```
-
----
-
-## Task 6: Update config.example.toml
-
-**Files:**
-- Modify: `config.example.toml`
-
-- [ ] **Step 1: Add TTL overrides section to config.example.toml**
-
-Add after the `[cache]` section:
-
-```toml
-[cache.ttl_overrides]
-# Per-engine TTL overrides (uncomment to use):
-# wikipedia = "48h"
-# reddit = "15m"
-# braveapi = "2h"
-```
-
-- [ ] **Step 2: Commit**
-
-```bash
-git add config.example.toml
-git commit -m "config: add cache.ttl_overrides example"
-```
-
----
-
-## Verification
-
-After all tasks complete, run:
-
-```bash
-nix develop --command bash -c "go test ./... -v 2>&1 | tail -50"
-```
-
-All tests should pass. The search service should now cache each engine's response independently with tier-based TTLs.
diff --git a/docs/superpowers/specs/2026-03-24-per-engine-ttl-cache-design.md b/docs/superpowers/specs/2026-03-24-per-engine-ttl-cache-design.md
deleted file mode 100644
index 5890190..0000000
--- a/docs/superpowers/specs/2026-03-24-per-engine-ttl-cache-design.md
+++ /dev/null
@@ -1,219 +0,0 @@
-# Per-Engine TTL Cache — Design
-
-## Overview
-
-Replace the current merged-response cache with a per-engine response cache. Each engine's raw response is cached independently with a tier-based TTL, enabling stale-while-revalidate semantics and more granular freshness control.
-
-## Cache Key Structure
-
-```
-samsa:resp:{engine}:{query_hash}
-```
-
-Where `query_hash` = SHA-256 of shared request params (query, pageno, safesearch, language, time_range), truncated to 16 hex chars.
-
-Example:
-- `samsa:resp:wikipedia:a3f1b2c3d4e5f678`
-- `samsa:resp:duckduckgo:a3f1b2c3d4e5f678`
-
-The same query to Wikipedia and DuckDuckGo produce different cache keys, enabling independent TTLs per engine.
-
-## Query Hash
-
-Compute from shared request parameters:
-
-```go
-func QueryHash(query string, pageno int, safesearch int, language, timeRange string) string {
- h := sha256.New()
- fmt.Fprintf(h, "q=%s|", query)
- fmt.Fprintf(h, "pageno=%d|", pageno)
- fmt.Fprintf(h, "safesearch=%d|", safesearch)
- fmt.Fprintf(h, "lang=%s|", language)
- if timeRange != "" {
- fmt.Fprintf(h, "tr=%s|", timeRange)
- }
- return hex.EncodeToString(h.Sum(nil))[:16]
-}
-```
-
-Note: `engines` is NOT included because each engine has its own cache key prefix.
-
-## Cached Data Format
-
-Each cache entry stores:
-
-```go
-type CachedEngineResponse struct {
- Engine string // engine name
- Response []byte // JSON-marshaled contracts.SearchResponse
- StoredAt time.Time // when cached (for staleness check)
-}
-```
-
-## TTL Tiers
-
-### Default Tier Assignments
-
-| Tier | Engines | Default TTL |
-|------|---------|-------------|
-| `static` | wikipedia, wikidata, arxiv, crossref, stackoverflow, github | 24h |
-| `api_general` | braveapi, youtube | 1h |
-| `scraped_general` | google, bing, duckduckgo, qwant, brave | 2h |
-| `news_social` | reddit | 30m |
-| `images` | bing_images, ddg_images, qwant_images | 1h |
-
-### TOML Override Format
-
-```toml
-[cache.ttl_overrides]
-wikipedia = "48h" # override default 24h
-reddit = "15m" # override default 30m
-```
-
-## Search Flow
-
-### 1. Parse Request
-Extract engine list from planner, compute shared `queryHash`.
-
-### 2. Parallel Cache Lookups
-For each engine, spawn a goroutine to check cache:
-
-```go
-type engineCacheResult struct {
- engine string
- resp contracts.SearchResponse
- fromCache bool
- err error
-}
-
-// For each engine, concurrently:
-cached, hit := engineCache.Get(ctx, engine, queryHash)
-if hit && !isStale(cached) {
- return cached.Response, nil // fresh cache hit
-}
-if hit && isStale(cached) {
- go refreshInBackground(engine, queryHash) // stale-while-revalidate
- return cached.Response, nil // return stale immediately
-}
-// cache miss
-fresh, err := engine.Search(ctx, req)
-engineCache.Set(ctx, engine, queryHash, fresh)
-return fresh, err
-```
-
-### 3. Classify Each Engine
-- **Cache miss** → fetch fresh immediately
-- **Cache hit, fresh** → use cached
-- **Cache hit, stale** → use cached, fetch fresh in background (stale-while-revalidate)
-
-### 4. Background Refresh
-When a stale cache hit occurs:
-1. Return stale data immediately
-2. Spawn goroutine to fetch fresh data
-3. On success, overwrite cache with fresh data
-4. On failure, log and discard (stale data already returned)
-
-### 5. Merge
-Collect all engine responses (cached + fresh), merge via existing `MergeResponses`.
-
-### 6. Write Fresh to Cache
-For engines that were fetched fresh, write to cache with their tier TTL.
-
-## Staleness Check
-
-```go
-func isStale(cached CachedEngineResponse, tier TTLTier) bool {
- return time.Since(cached.StoredAt) > tier.Duration
-}
-```
-
-## Tier Resolution
-
-```go
-type TTLTier struct {
- Name string
- Duration time.Duration
-}
-
-func EngineTier(engineName string) TTLTier {
- if override := ttlOverrides[engineName]; override > 0 {
- return TTLTier{Name: engineName, Duration: override}
- }
- return defaultTiers[engineName] // from hardcoded map above
-}
-```
-
-## New Files
-
-### `internal/cache/engine_cache.go`
-`EngineCache` struct wrapping `*Cache` with tier-aware `Get/Set` methods:
-
-```go
-type EngineCache struct {
- cache *Cache
- overrides map[string]time.Duration
- tiers map[string]TTLTier
-}
-
-func (ec *EngineCache) Get(ctx context.Context, engine, queryHash string) (CachedEngineResponse, bool)
-func (ec *EngineCache) Set(ctx context.Context, engine, queryHash string, resp contracts.SearchResponse)
-```
-
-### `internal/cache/tiers.go`
-Tier definitions and `EngineTier(engineName string)` function.
-
-## Modified Files
-
-### `internal/cache/cache.go`
-- Rename `Key()` to `QueryHash()` and add `Engine` prefix externally
-- `Get/Set` remain for favicon caching (unchanged)
-
-### `internal/search/service.go`
-- Replace `*Cache` with `*EngineCache`
-- Parallel cache lookups with goroutines
-- Stale-while-revalidate background refresh
-- Merge collected responses
-
-### `internal/config/config.go`
-Add `TTLOverrides` field:
-
-```go
-type CacheConfig struct {
- // ... existing fields ...
- TTLOverrides map[string]time.Duration
-}
-```
-
-## Config Example
-
-```toml
-[cache]
-enabled = true
-url = "valkey://localhost:6379/0"
-default_ttl = "5m"
-
-[cache.ttl_overrides]
-wikipedia = "48h"
-reddit = "15m"
-braveapi = "2h"
-```
-
-## Error Handling
-
-- **Cache read failure**: Treat as cache miss, fetch fresh
-- **Cache write failure**: Log warning, continue without caching for that engine
-- **Background refresh failure**: Log error, discard (stale data already returned)
-- **Engine failure**: Continue with other engines, report in `unresponsive_engines`
-
-## Testing
-
-1. **Unit tests** for `QueryHash()` consistency
-2. **Unit tests** for `EngineTier()` with overrides
-3. **Unit tests** for `isStale()` boundary conditions
-4. **Integration tests** for cache hit/miss/stale scenarios using mock Valkey
-
-## Out of Scope
-
-- Cache invalidation API (future work)
-- Dog-pile prevention (future work)
-- Per-engine cache size limits (future work)
diff --git a/flake.nix b/flake.nix
index 9ff4c83..d143495 100644
--- a/flake.nix
+++ b/flake.nix
@@ -21,16 +21,13 @@
version = "0.1.0";
src = ./.;
- vendorHash = "sha256-8wlKD+33s97oorCJTfHKAgE2Xp1HKXV+bSr6z29KrKM=";
+ vendorHash = "sha256-NbAa4QM/TI3BTuZs4glx9k3ZjSl2/2LQfKlQ7izR8Ho=";
# Run: nix build .#packages.x86_64-linux.default
- # It will fail with the correct hash. Replace vendorHash with it.
+ # It will fail with the correct hash. Replace it here.
# Embed the templates and static files at build time.
ldflags = [ "-s" "-w" ];
- # Remove stale vendor directory before buildGoModule deletes it.
- preConfigure = "rm -rf vendor || true";
-
nativeCheckInputs = with pkgs; [ ];
# Tests require network; they run in CI instead.
@@ -61,7 +58,7 @@
port = lib.mkOption {
type = lib.types.port;
- default = 5355;
+ default = 8080;
description = "Port to listen on.";
};
diff --git a/go.mod b/go.mod
index 6dff3c8..f153b2d 100644
--- a/go.mod
+++ b/go.mod
@@ -1,10 +1,10 @@
-module github.com/metamorphosis-dev/samsa
+module github.com/metamorphosis-dev/kafka
-go 1.24
+go 1.25.0
require (
github.com/BurntSushi/toml v1.5.0
- github.com/PuerkitoBio/goquery v1.9.0
+ github.com/PuerkitoBio/goquery v1.12.0
github.com/redis/go-redis/v9 v9.18.0
)
@@ -13,7 +13,5 @@ require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
go.uber.org/atomic v1.11.0 // indirect
- golang.org/x/net v0.33.0 // indirect
+ golang.org/x/net v0.52.0 // indirect
)
-
-replace golang.org/x/net => golang.org/x/net v0.38.0
diff --git a/go.sum b/go.sum
index 65bdc02..0aad3f0 100644
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,7 @@
github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
-github.com/PuerkitoBio/goquery v1.9.0 h1:zgjKkdpRY9T97Q5DCtcXwfqkcylSFIVCocZmn2huTp8=
-github.com/PuerkitoBio/goquery v1.9.0/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
+github.com/PuerkitoBio/goquery v1.12.0 h1:pAcL4g3WRXekcB9AU/y1mbKez2dbY2AajVhtkO8RIBo=
+github.com/PuerkitoBio/goquery v1.12.0/go.mod h1:802ej+gV2y7bbIhOIoPY5sT183ZW0YFofScC4q/hIpQ=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
@@ -28,36 +28,68 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
-golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
+golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
-golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/img/screenshot1.png b/img/screenshot1.png
deleted file mode 100644
index dfcec4f..0000000
Binary files a/img/screenshot1.png and /dev/null differ
diff --git a/internal/autocomplete/service.go b/internal/autocomplete/service.go
index fd3d8ea..3892d63 100644
--- a/internal/autocomplete/service.go
+++ b/internal/autocomplete/service.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package autocomplete
import (
@@ -25,11 +9,10 @@ import (
"net/url"
"strings"
"time"
-
- "github.com/metamorphosis-dev/samsa/internal/httpclient"
)
-// Service fetches search suggestions from upstream or Wikipedia OpenSearch.
+// Service fetches search suggestions from an upstream SearXNG instance
+// or falls back to Wikipedia's OpenSearch API.
type Service struct {
upstreamURL string
http *http.Client
@@ -41,10 +24,11 @@ func NewService(upstreamURL string, timeout time.Duration) *Service {
}
return &Service{
upstreamURL: strings.TrimRight(upstreamURL, "/"),
- http: httpclient.NewClient(timeout),
+ http: &http.Client{Timeout: timeout},
}
}
+// Suggestions returns search suggestions for the given query.
func (s *Service) Suggestions(ctx context.Context, query string) ([]string, error) {
if strings.TrimSpace(query) == "" {
return nil, nil
@@ -56,6 +40,7 @@ func (s *Service) Suggestions(ctx context.Context, query string) ([]string, erro
return s.wikipediaSuggestions(ctx, query)
}
+// upstreamSuggestions proxies to an upstream SearXNG /autocompleter endpoint.
func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]string, error) {
u := s.upstreamURL + "/autocompleter?" + url.Values{"q": {query}}.Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
@@ -79,7 +64,7 @@ func (s *Service) upstreamSuggestions(ctx context.Context, query string) ([]stri
return nil, err
}
- // The /autocompleter endpoint returns a plain JSON array of strings.
+ // SearXNG /autocompleter returns a plain JSON array of strings.
var out []string
if err := json.Unmarshal(body, &out); err != nil {
return nil, err
@@ -104,7 +89,7 @@ func (s *Service) wikipediaSuggestions(ctx context.Context, query string) ([]str
}
req.Header.Set(
"User-Agent",
- "gosearch-go/0.1 (compatible; +https://github.com/metamorphosis-dev/samsa)",
+ "gosearch-go/0.1 (compatible; +https://github.com/metamorphosis-dev/kafka)",
)
resp, err := s.http.Do(req)
diff --git a/internal/cache/cache.go b/internal/cache/cache.go
index 79d3c8b..b0099f9 100644
--- a/internal/cache/cache.go
+++ b/internal/cache/cache.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package cache
import (
@@ -25,7 +9,7 @@ import (
"log/slog"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
"github.com/redis/go-redis/v9"
)
@@ -97,7 +81,7 @@ func (c *Cache) Get(ctx context.Context, key string) (contracts.SearchResponse,
return contracts.SearchResponse{}, false
}
- fullKey := "samsa:" + key
+ fullKey := "kafka:" + key
data, err := c.client.Get(ctx, fullKey).Bytes()
if err != nil {
@@ -129,7 +113,7 @@ func (c *Cache) Set(ctx context.Context, key string, resp contracts.SearchRespon
return
}
- fullKey := "samsa:" + key
+ fullKey := "kafka:" + key
if err := c.client.Set(ctx, fullKey, data, c.ttl).Err(); err != nil {
c.logger.Warn("cache set failed", "key", fullKey, "error", err)
}
@@ -140,42 +124,10 @@ func (c *Cache) Invalidate(ctx context.Context, key string) {
if !c.Enabled() {
return
}
- fullKey := "samsa:" + key
+ fullKey := "kafka:" + key
c.client.Del(ctx, fullKey)
}
-// GetBytes retrieves a raw byte slice from the cache. Returns (data, true) on hit,
-// (nil, false) on miss or error.
-func (c *Cache) GetBytes(ctx context.Context, key string) ([]byte, bool) {
- if !c.Enabled() {
- return nil, false
- }
- fullKey := "samsa:" + key
- data, err := c.client.Get(ctx, fullKey).Bytes()
- if err != nil {
- if err != redis.Nil {
- c.logger.Debug("cache bytes miss (error)", "key", fullKey, "error", err)
- }
- return nil, false
- }
- return data, true
-}
-
-// SetBytes stores a raw byte slice with a custom TTL.
-// If ttl <= 0, the cache's default TTL is used.
-func (c *Cache) SetBytes(ctx context.Context, key string, data []byte, ttl time.Duration) {
- if !c.Enabled() {
- return
- }
- if ttl <= 0 {
- ttl = c.ttl
- }
- fullKey := "samsa:" + key
- if err := c.client.Set(ctx, fullKey, data, ttl).Err(); err != nil {
- c.logger.Warn("cache set bytes failed", "key", fullKey, "error", err)
- }
-}
-
// Close closes the Valkey connection.
func (c *Cache) Close() error {
if c.client == nil {
@@ -208,25 +160,3 @@ func Key(req contracts.SearchRequest) string {
return hex.EncodeToString(h.Sum(nil))[:32]
}
-
-// QueryHash computes a deterministic hash from shared request parameters
-// (query, pageno, safesearch, language, time_range) for use as a cache key suffix.
-// The hash is a truncated SHA-256 (16 hex chars).
-func QueryHash(query string, pageno int, safesearch int, language, timeRange string) string {
- h := sha256.New()
- fmt.Fprintf(h, "q=%s|", query)
- fmt.Fprintf(h, "pageno=%d|", pageno)
- fmt.Fprintf(h, "safesearch=%d|", safesearch)
- fmt.Fprintf(h, "lang=%s|", language)
- if timeRange != "" {
- fmt.Fprintf(h, "tr=%s|", timeRange)
- }
- return hex.EncodeToString(h.Sum(nil))[:16]
-}
-
-// CachedEngineResponse wraps an engine's cached response with metadata.
-type CachedEngineResponse struct {
- Engine string
- Response []byte
- StoredAt time.Time
-}
diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go
index 6857f05..3cbb9eb 100644
--- a/internal/cache/cache_test.go
+++ b/internal/cache/cache_test.go
@@ -3,13 +3,13 @@ package cache
import (
"testing"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestKey_Deterministic(t *testing.T) {
req := contracts.SearchRequest{
Format: contracts.FormatJSON,
- Query: "samsa metamorphosis",
+ Query: "kafka metamorphosis",
Pageno: 1,
Safesearch: 0,
Language: "auto",
@@ -29,7 +29,7 @@ func TestKey_Deterministic(t *testing.T) {
}
func TestKey_DifferentQueries(t *testing.T) {
- reqA := contracts.SearchRequest{Query: "samsa", Format: contracts.FormatJSON}
+ reqA := contracts.SearchRequest{Query: "kafka", Format: contracts.FormatJSON}
reqB := contracts.SearchRequest{Query: "orwell", Format: contracts.FormatJSON}
if Key(reqA) == Key(reqB) {
@@ -75,35 +75,3 @@ func TestNew_NopWithoutAddress(t *testing.T) {
}
func strPtr(s string) *string { return &s }
-
-func TestQueryHash(t *testing.T) {
- // Same params should produce same hash
- hash1 := QueryHash("golang", 1, 0, "en", "")
- hash2 := QueryHash("golang", 1, 0, "en", "")
- if hash1 != hash2 {
- t.Errorf("QueryHash: same params should produce same hash, got %s != %s", hash1, hash2)
- }
-
- // Different query should produce different hash
- hash3 := QueryHash("rust", 1, 0, "en", "")
- if hash1 == hash3 {
- t.Errorf("QueryHash: different queries should produce different hash")
- }
-
- // Different pageno should produce different hash
- hash4 := QueryHash("golang", 2, 0, "en", "")
- if hash1 == hash4 {
- t.Errorf("QueryHash: different pageno should produce different hash")
- }
-
- // time_range should affect hash
- hash5 := QueryHash("golang", 1, 0, "en", "day")
- if hash1 == hash5 {
- t.Errorf("QueryHash: different time_range should produce different hash")
- }
-
- // Hash should be 16 characters (truncated SHA-256)
- if len(hash1) != 16 {
- t.Errorf("QueryHash: expected 16 char hash, got %d", len(hash1))
- }
-}
diff --git a/internal/cache/engine_cache.go b/internal/cache/engine_cache.go
deleted file mode 100644
index edbebdd..0000000
--- a/internal/cache/engine_cache.go
+++ /dev/null
@@ -1,91 +0,0 @@
-package cache
-
-import (
- "context"
- "encoding/json"
- "log/slog"
- "time"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-// EngineCache wraps Cache with per-engine tier-aware Get/Set operations.
-type EngineCache struct {
- cache *Cache
- overrides map[string]time.Duration
-}
-
-// NewEngineCache creates a new EngineCache with optional TTL overrides.
-// If overrides is nil, default tier durations are used.
-func NewEngineCache(cache *Cache, overrides map[string]time.Duration) *EngineCache {
- return &EngineCache{
- cache: cache,
- overrides: overrides,
- }
-}
-
-// Get retrieves a cached engine response. Returns (zero value, false) if not
-// found or if cache is disabled.
-func (ec *EngineCache) Get(ctx context.Context, engine, queryHash string) (CachedEngineResponse, bool) {
- key := engineCacheKey(engine, queryHash)
-
- data, ok := ec.cache.GetBytes(ctx, key)
- if !ok {
- return CachedEngineResponse{}, false
- }
-
- var cached CachedEngineResponse
- if err := json.Unmarshal(data, &cached); err != nil {
- ec.cache.logger.Warn("engine cache hit but unmarshal failed", "key", key, "error", err)
- return CachedEngineResponse{}, false
- }
-
- ec.cache.logger.Debug("engine cache hit", "key", key, "engine", engine)
- return cached, true
-}
-
-// Set stores an engine response in the cache with the engine's tier TTL.
-func (ec *EngineCache) Set(ctx context.Context, engine, queryHash string, resp contracts.SearchResponse) {
- if !ec.cache.Enabled() {
- return
- }
-
- data, err := json.Marshal(resp)
- if err != nil {
- ec.cache.logger.Warn("engine cache set: marshal failed", "engine", engine, "error", err)
- return
- }
-
- tier := EngineTier(engine, ec.overrides)
- key := engineCacheKey(engine, queryHash)
-
- cached := CachedEngineResponse{
- Engine: engine,
- Response: data,
- StoredAt: time.Now(),
- }
-
- cachedData, err := json.Marshal(cached)
- if err != nil {
- ec.cache.logger.Warn("engine cache set: wrap marshal failed", "key", key, "error", err)
- return
- }
-
- ec.cache.SetBytes(ctx, key, cachedData, tier.Duration)
-}
-
-// IsStale returns true if the cached response is older than the tier's TTL.
-func (ec *EngineCache) IsStale(cached CachedEngineResponse, engine string) bool {
- tier := EngineTier(engine, ec.overrides)
- return time.Since(cached.StoredAt) > tier.Duration
-}
-
-// Logger returns the logger for background refresh logging.
-func (ec *EngineCache) Logger() *slog.Logger {
- return ec.cache.logger
-}
-
-// engineCacheKey builds the cache key for an engine+query combination.
-func engineCacheKey(engine, queryHash string) string {
- return "samsa:resp:" + engine + ":" + queryHash
-}
\ No newline at end of file
diff --git a/internal/cache/engine_cache_test.go b/internal/cache/engine_cache_test.go
deleted file mode 100644
index 721e1eb..0000000
--- a/internal/cache/engine_cache_test.go
+++ /dev/null
@@ -1,95 +0,0 @@
-package cache
-
-import (
- "context"
- "log/slog"
- "testing"
- "time"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-func TestEngineCacheGetSet(t *testing.T) {
- // Create a disabled cache for unit testing (nil client)
- c := &Cache{logger: slog.Default()}
- ec := NewEngineCache(c, nil)
-
- ctx := context.Background()
- cached, ok := ec.Get(ctx, "wikipedia", "abc123")
- if ok {
- t.Errorf("Get on disabled cache: expected false, got %v", ok)
- }
- _ = cached // unused when ok=false
-}
-
-func TestEngineCacheKeyFormat(t *testing.T) {
- key := engineCacheKey("wikipedia", "abc123")
- if key != "samsa:resp:wikipedia:abc123" {
- t.Errorf("engineCacheKey: expected samsa:resp:wikipedia:abc123, got %s", key)
- }
-}
-
-func TestEngineCacheIsStale(t *testing.T) {
- c := &Cache{logger: slog.Default()}
- ec := NewEngineCache(c, nil)
-
- // Fresh response (stored 1 minute ago, wikipedia has 24h TTL)
- fresh := CachedEngineResponse{
- Engine: "wikipedia",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-1 * time.Minute),
- }
- if ec.IsStale(fresh, "wikipedia") {
- t.Errorf("IsStale: 1-minute-old wikipedia should NOT be stale")
- }
-
- // Stale response (stored 25 hours ago)
- stale := CachedEngineResponse{
- Engine: "wikipedia",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-25 * time.Hour),
- }
- if !ec.IsStale(stale, "wikipedia") {
- t.Errorf("IsStale: 25-hour-old wikipedia SHOULD be stale (24h TTL)")
- }
-
- // Override: 30 minute TTL for reddit
- overrides := map[string]time.Duration{"reddit": 30 * time.Minute}
- ec2 := NewEngineCache(c, overrides)
-
- // 20 minutes old with 30m override should NOT be stale
- redditFresh := CachedEngineResponse{
- Engine: "reddit",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-20 * time.Minute),
- }
- if ec2.IsStale(redditFresh, "reddit") {
- t.Errorf("IsStale: 20-min reddit with 30m override should NOT be stale")
- }
-
- // 45 minutes old with 30m override SHOULD be stale
- redditStale := CachedEngineResponse{
- Engine: "reddit",
- Response: []byte(`{}`),
- StoredAt: time.Now().Add(-45 * time.Minute),
- }
- if !ec2.IsStale(redditStale, "reddit") {
- t.Errorf("IsStale: 45-min reddit with 30m override SHOULD be stale")
- }
-}
-
-func TestEngineCacheSetResponseType(t *testing.T) {
- c := &Cache{logger: slog.Default()}
- ec := NewEngineCache(c, nil)
-
- ctx := context.Background()
- urlStr := "https://example.com"
- resp := contracts.SearchResponse{
- Results: []contracts.MainResult{
- {Title: "Test", URL: &urlStr},
- },
- }
-
- // Should not panic on disabled cache
- ec.Set(ctx, "wikipedia", "abc123", resp)
-}
\ No newline at end of file
diff --git a/internal/cache/tiers.go b/internal/cache/tiers.go
deleted file mode 100644
index 6df07ab..0000000
--- a/internal/cache/tiers.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package cache
-
-import "time"
-
-// TTLTier represents a cache TTL tier with a name and duration.
-type TTLTier struct {
- Name string
- Duration time.Duration
-}
-
-// defaultTiers maps engine names to their default TTL tiers.
-var defaultTiers = map[string]TTLTier{
- // Static knowledge engines — rarely change
- "wikipedia": {Name: "static", Duration: 24 * time.Hour},
- "wikidata": {Name: "static", Duration: 24 * time.Hour},
- "arxiv": {Name: "static", Duration: 24 * time.Hour},
- "crossref": {Name: "static", Duration: 24 * time.Hour},
- "stackoverflow": {Name: "static", Duration: 24 * time.Hour},
- "github": {Name: "static", Duration: 24 * time.Hour},
-
- // API-based general search — fresher data
- "braveapi": {Name: "api_general", Duration: 1 * time.Hour},
- "youtube": {Name: "api_general", Duration: 1 * time.Hour},
-
- // Scraped general search — moderately stable
- "google": {Name: "scraped_general", Duration: 2 * time.Hour},
- "bing": {Name: "scraped_general", Duration: 2 * time.Hour},
- "duckduckgo": {Name: "scraped_general", Duration: 2 * time.Hour},
- "qwant": {Name: "scraped_general", Duration: 2 * time.Hour},
- "brave": {Name: "scraped_general", Duration: 2 * time.Hour},
-
- // News/social — changes frequently
- "reddit": {Name: "news_social", Duration: 30 * time.Minute},
-
- // Image search
- "bing_images": {Name: "images", Duration: 1 * time.Hour},
- "ddg_images": {Name: "images", Duration: 1 * time.Hour},
- "qwant_images": {Name: "images", Duration: 1 * time.Hour},
-}
-
-// EngineTier returns the TTL tier for an engine, applying overrides if provided.
-// If the engine has no defined tier, returns a default of 1 hour.
-func EngineTier(engineName string, overrides map[string]time.Duration) TTLTier {
- // Check override first — override tier name is just the engine name
- if override, ok := overrides[engineName]; ok && override > 0 {
- return TTLTier{Name: engineName, Duration: override}
- }
-
- // Fall back to default tier
- if tier, ok := defaultTiers[engineName]; ok {
- return tier
- }
-
- // Unknown engines get a sensible default
- return TTLTier{Name: "unknown", Duration: 1 * time.Hour}
-}
\ No newline at end of file
diff --git a/internal/cache/tiers_test.go b/internal/cache/tiers_test.go
deleted file mode 100644
index b9419a9..0000000
--- a/internal/cache/tiers_test.go
+++ /dev/null
@@ -1,33 +0,0 @@
-package cache
-
-import (
- "testing"
- "time"
-)
-
-func TestEngineTier(t *testing.T) {
- // Test default static tier
- tier := EngineTier("wikipedia", nil)
- if tier.Name != "static" || tier.Duration != 24*time.Hour {
- t.Errorf("wikipedia: expected static/24h, got %s/%v", tier.Name, tier.Duration)
- }
-
- // Test default api_general tier
- tier = EngineTier("braveapi", nil)
- if tier.Name != "api_general" || tier.Duration != 1*time.Hour {
- t.Errorf("braveapi: expected api_general/1h, got %s/%v", tier.Name, tier.Duration)
- }
-
- // Test override takes precedence — override tier name is just the engine name
- override := 48 * time.Hour
- tier = EngineTier("wikipedia", map[string]time.Duration{"wikipedia": override})
- if tier.Name != "wikipedia" || tier.Duration != 48*time.Hour {
- t.Errorf("wikipedia override: expected wikipedia/48h, got %s/%v", tier.Name, tier.Duration)
- }
-
- // Test unknown engine gets default
- tier = EngineTier("unknown_engine", nil)
- if tier.Name != "unknown" || tier.Duration != 1*time.Hour {
- t.Errorf("unknown engine: expected unknown/1h, got %s/%v", tier.Name, tier.Duration)
- }
-}
\ No newline at end of file
diff --git a/internal/config/config.go b/internal/config/config.go
index c834016..93b8d86 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package config
import (
@@ -23,10 +7,9 @@ import (
"time"
"github.com/BurntSushi/toml"
- "github.com/metamorphosis-dev/samsa/internal/util"
)
-// Config is the top-level configuration for the samsa service.
+// Config is the top-level configuration for the kafka service.
type Config struct {
Server ServerConfig `toml:"server"`
Upstream UpstreamConfig `toml:"upstream"`
@@ -41,8 +24,7 @@ type Config struct {
type ServerConfig struct {
Port int `toml:"port"`
HTTPTimeout string `toml:"http_timeout"`
- BaseURL string `toml:"base_url"` // Public URL for OpenSearch XML (e.g. "https://search.example.com")
- SourceURL string `toml:"source_url"` // Link to the source code (e.g. "https://git.example.com/fork/kafka")
+ BaseURL string `toml:"base_url"` // Public URL for OpenSearch XML (e.g. "https://search.example.com")
}
type UpstreamConfig struct {
@@ -50,24 +32,17 @@ type UpstreamConfig struct {
}
type EnginesConfig struct {
- LocalPorted []string `toml:"local_ported"`
- Brave BraveConfig `toml:"brave"`
- Qwant QwantConfig `toml:"qwant"`
- YouTube YouTubeConfig `toml:"youtube"`
- StackOverflow *StackOverflowConfig `toml:"stackoverflow"`
-}
-
-type StackOverflowConfig struct {
- APIKey string `toml:"api_key"`
+ LocalPorted []string `toml:"local_ported"`
+ Brave BraveConfig `toml:"brave"`
+ Qwant QwantConfig `toml:"qwant"`
}
// CacheConfig holds Valkey/Redis cache settings.
type CacheConfig struct {
- Address string `toml:"address"` // Valkey server address (e.g. "localhost:6379")
- Password string `toml:"password"` // Auth password (empty = none)
- DB int `toml:"db"` // Database index (default 0)
- DefaultTTL string `toml:"default_ttl"` // Cache TTL (e.g. "5m", default "5m")
- TTLOverrides map[string]string `toml:"ttl_overrides"` // engine -> duration string
+ Address string `toml:"address"` // Valkey server address (e.g. "localhost:6379")
+ Password string `toml:"password"` // Auth password (empty = none)
+ DB int `toml:"db"` // Database index (default 0)
+ DefaultTTL string `toml:"default_ttl"` // Cache TTL (e.g. "5m", default "5m")
}
// CORSConfig holds CORS middleware settings.
@@ -84,7 +59,6 @@ type RateLimitConfig struct {
Requests int `toml:"requests"` // Max requests per window (default: 30)
Window string `toml:"window"` // Time window (e.g. "1m", default: "1m")
CleanupInterval string `toml:"cleanup_interval"` // Stale entry cleanup interval (default: "5m")
- TrustedProxies []string `toml:"trusted_proxies"` // CIDRs allowed to set X-Forwarded-For
}
// GlobalRateLimitConfig holds server-wide rate limiting settings.
@@ -111,10 +85,6 @@ type QwantConfig struct {
ResultsPerPage int `toml:"results_per_page"`
}
-type YouTubeConfig struct {
- APIKey string `toml:"api_key"`
-}
-
// Load reads configuration from the given TOML file path.
// If the file does not exist, it returns defaults (empty values where applicable).
// Environment variables are used as fallbacks for any zero-value fields.
@@ -128,45 +98,18 @@ func Load(path string) (*Config, error) {
}
applyEnvOverrides(cfg)
-
- if err := validateConfig(cfg); err != nil {
- return nil, fmt.Errorf("invalid configuration: %w", err)
- }
-
return cfg, nil
}
-// validateConfig checks security-critical config values at startup.
-func validateConfig(cfg *Config) error {
- if cfg.Server.BaseURL != "" {
- if err := util.ValidatePublicURL(cfg.Server.BaseURL); err != nil {
- return fmt.Errorf("server.base_url: %w", err)
- }
- }
- if cfg.Server.SourceURL != "" {
- if err := util.ValidatePublicURL(cfg.Server.SourceURL); err != nil {
- return fmt.Errorf("server.source_url: %w", err)
- }
- }
- if cfg.Upstream.URL != "" {
- // Validate scheme and well-formedness, but allow private IPs
- // since self-hosted deployments commonly use localhost/internal addresses.
- if _, err := util.SafeURLScheme(cfg.Upstream.URL); err != nil {
- return fmt.Errorf("upstream.url: %w", err)
- }
- }
- return nil
-}
-
func defaultConfig() *Config {
return &Config{
Server: ServerConfig{
- Port: 5355,
+ Port: 8080,
HTTPTimeout: "10s",
},
Upstream: UpstreamConfig{},
Engines: EnginesConfig{
- LocalPorted: []string{"wikipedia", "wikidata", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing", "google", "youtube", "bing_images", "ddg_images", "qwant_images"},
+ LocalPorted: []string{"wikipedia", "arxiv", "crossref", "braveapi", "qwant", "duckduckgo", "github", "reddit", "bing"},
Qwant: QwantConfig{
Category: "web-lite",
ResultsPerPage: 10,
@@ -208,15 +151,6 @@ func applyEnvOverrides(cfg *Config) {
if v := os.Getenv("BRAVE_ACCESS_TOKEN"); v != "" {
cfg.Engines.Brave.AccessToken = v
}
- if v := os.Getenv("YOUTUBE_API_KEY"); v != "" {
- cfg.Engines.YouTube.APIKey = v
- }
- if v := os.Getenv("STACKOVERFLOW_KEY"); v != "" {
- if cfg.Engines.StackOverflow == nil {
- cfg.Engines.StackOverflow = &StackOverflowConfig{}
- }
- cfg.Engines.StackOverflow.APIKey = v
- }
if v := os.Getenv("VALKEY_ADDRESS"); v != "" {
cfg.Cache.Address = v
}
@@ -285,20 +219,6 @@ func (c *Config) CacheTTL() time.Duration {
return 5 * time.Minute
}
-// CacheTTLOverrides returns parsed TTL overrides from config.
-func (c *Config) CacheTTLOverrides() map[string]time.Duration {
- if len(c.Cache.TTLOverrides) == 0 {
- return nil
- }
- out := make(map[string]time.Duration, len(c.Cache.TTLOverrides))
- for engine, durStr := range c.Cache.TTLOverrides {
- if d, err := time.ParseDuration(durStr); err == nil && d > 0 {
- out[engine] = d
- }
- }
- return out
-}
-
// RateLimitWindow parses the rate limit window into a time.Duration.
func (c *Config) RateLimitWindow() time.Duration {
if d, err := time.ParseDuration(c.RateLimit.Window); err == nil && d > 0 {
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index 993f466..4a09848 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -11,11 +11,11 @@ func TestLoadDefaults(t *testing.T) {
if err != nil {
t.Fatalf("Load with missing file should return defaults: %v", err)
}
- if cfg.Server.Port != 5355 {
- t.Errorf("expected default port 5355, got %d", cfg.Server.Port)
+ if cfg.Server.Port != 8080 {
+ t.Errorf("expected default port 8080, got %d", cfg.Server.Port)
}
- if len(cfg.Engines.LocalPorted) != 15 {
- t.Errorf("expected 15 default engines, got %d", len(cfg.Engines.LocalPorted))
+ if len(cfg.Engines.LocalPorted) != 9 {
+ t.Errorf("expected 9 default engines, got %d", len(cfg.Engines.LocalPorted))
}
}
diff --git a/internal/contracts/main_result.go b/internal/contracts/main_result.go
index 12d2d22..48005f8 100644
--- a/internal/contracts/main_result.go
+++ b/internal/contracts/main_result.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package contracts
import (
@@ -21,33 +5,40 @@ import (
"encoding/json"
)
-// MainResult represents one element of the `results` array.
-// Unknown keys are preserved in `raw` and re-emitted via MarshalJSON.
+// MainResult represents one element of SearXNG's `results` array.
+//
+// SearXNG returns many additional keys beyond what templates use. To keep the
+// contract stable for proxying/merging, we preserve all unknown keys in
+// `raw` and re-emit them via MarshalJSON.
type MainResult struct {
raw map[string]any
- Template string `json:"template"`
- Title string `json:"title"`
- Content string `json:"content"`
- URL *string `json:"url"`
- Pubdate *string `json:"pubdate"`
- Thumbnail string `json:"thumbnail"`
+ // Common fields used by SearXNG templates (RSS uses: title, url, content, pubdate).
+ Template string `json:"template"`
+ Title string `json:"title"`
+ Content string `json:"content"`
+ URL *string `json:"url"`
+ Pubdate *string `json:"pubdate"`
- Engine string `json:"engine"`
- Score float64 `json:"score"`
- Category string `json:"category"`
- Priority string `json:"priority"`
+ Engine string `json:"engine"`
+ Score float64 `json:"score"`
+ Category string `json:"category"`
+ Priority string `json:"priority"`
Positions []int `json:"positions"`
Engines []string `json:"engines"`
+ // These fields exist in SearXNG's MainResult base; keep them so downstream
+ // callers can generate richer output later.
OpenGroup bool `json:"open_group"`
CloseGroup bool `json:"close_group"`
+ // parsed_url in SearXNG is emitted as a tuple; we preserve it as-is.
ParsedURL any `json:"parsed_url"`
}
func (mr *MainResult) UnmarshalJSON(data []byte) error {
+ // Preserve the full object.
dec := json.NewDecoder(bytes.NewReader(data))
dec.UseNumber()
@@ -58,11 +49,11 @@ func (mr *MainResult) UnmarshalJSON(data []byte) error {
mr.raw = m
+ // Fill the typed/common fields (best-effort; don't fail if types differ).
mr.Template = stringOrEmpty(m["template"])
mr.Title = stringOrEmpty(m["title"])
mr.Content = stringOrEmpty(m["content"])
mr.Engine = stringOrEmpty(m["engine"])
- mr.Thumbnail = stringOrEmpty(m["thumbnail"])
mr.Category = stringOrEmpty(m["category"])
mr.Priority = stringOrEmpty(m["priority"])
@@ -95,26 +86,27 @@ func (mr *MainResult) UnmarshalJSON(data []byte) error {
}
func (mr MainResult) MarshalJSON() ([]byte, error) {
+ // If we came from upstream JSON, preserve all keys exactly.
if mr.raw != nil {
return json.Marshal(mr.raw)
}
+ // Otherwise, marshal the known fields.
m := map[string]any{
- "template": mr.Template,
- "title": mr.Title,
- "content": mr.Content,
- "url": mr.URL,
- "pubdate": mr.Pubdate,
- "thumbnail": mr.Thumbnail,
- "engine": mr.Engine,
- "score": mr.Score,
- "category": mr.Category,
- "priority": mr.Priority,
+ "template": mr.Template,
+ "title": mr.Title,
+ "content": mr.Content,
+ "url": mr.URL,
+ "pubdate": mr.Pubdate,
+ "engine": mr.Engine,
+ "score": mr.Score,
+ "category": mr.Category,
+ "priority": mr.Priority,
"positions": mr.Positions,
"engines": mr.Engines,
- "open_group": mr.OpenGroup,
+ "open_group": mr.OpenGroup,
"close_group": mr.CloseGroup,
- "parsed_url": mr.ParsedURL,
+ "parsed_url": mr.ParsedURL,
}
return json.Marshal(m)
}
diff --git a/internal/contracts/types.go b/internal/contracts/types.go
index d77a6a5..a68f77a 100644
--- a/internal/contracts/types.go
+++ b/internal/contracts/types.go
@@ -1,34 +1,21 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package contracts
-// OutputFormat matches the `/search?format=...` values.
+// OutputFormat matches SearXNG's `/search?format=...` values.
type OutputFormat string
const (
- FormatHTML OutputFormat = "html" // accepted for compatibility
+ FormatHTML OutputFormat = "html" // accepted for compatibility (not yet implemented)
FormatJSON OutputFormat = "json"
FormatCSV OutputFormat = "csv"
FormatRSS OutputFormat = "rss"
)
type SearchRequest struct {
- Format OutputFormat
- Query string
+ // Format is what the client requested via `format=...`.
+ Format OutputFormat
+
+ Query string
+
Pageno int
Safesearch int
TimeRange *string
@@ -36,18 +23,20 @@ type SearchRequest struct {
TimeoutLimit *float64
Language string
- // Engines and categories decide which engines run locally vs proxy to upstream.
+ // Engines and categories are used for deciding which engines run locally vs are proxied.
+ // For now, engines can be supplied directly via the `engines` form parameter.
Engines []string
Categories []string
- // EngineData matches the `engine_data--=` parameters.
+ // EngineData matches SearXNG's `engine_data--=` parameters.
EngineData map[string]map[string]string
- // AccessToken gates paid/limited engines. Not part of upstream JSON schema.
+ // AccessToken is an optional request token used to gate paid/limited engines.
+ // It is not part of the upstream JSON schema; it only influences local engines.
AccessToken string
}
-// SearchResponse matches the JSON schema used by `webutils.get_json_response()`.
+// SearchResponse matches the JSON schema returned by SearXNG's `webutils.get_json_response()`.
type SearchResponse struct {
Query string `json:"query"`
NumberOfResults int `json:"number_of_results"`
diff --git a/internal/engines/arxiv.go b/internal/engines/arxiv.go
index d82605e..5565f7f 100644
--- a/internal/engines/arxiv.go
+++ b/internal/engines/arxiv.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -28,7 +12,7 @@ import (
"strings"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
const (
@@ -75,8 +59,8 @@ func (e *ArxivEngine) Search(ctx context.Context, req contracts.SearchRequest) (
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
- return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
+ return contracts.SearchResponse{}, fmt.Errorf("arxiv upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
raw, err := io.ReadAll(resp.Body)
diff --git a/internal/engines/arxiv_test.go b/internal/engines/arxiv_test.go
index e0eaa9a..a59fe3c 100644
--- a/internal/engines/arxiv_test.go
+++ b/internal/engines/arxiv_test.go
@@ -6,7 +6,7 @@ import (
"strings"
"testing"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestArxivEngine_Search(t *testing.T) {
diff --git a/internal/engines/bing.go b/internal/engines/bing.go
index 085e41d..ff1a5da 100644
--- a/internal/engines/bing.go
+++ b/internal/engines/bing.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -28,7 +12,7 @@ import (
"strconv"
"strings"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
// BingEngine searches Bing via the public Bing API.
@@ -68,8 +52,8 @@ func (e *BingEngine) Search(ctx context.Context, req contracts.SearchRequest) (c
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
- return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
+ return contracts.SearchResponse{}, fmt.Errorf("bing upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
contentType := resp.Header.Get("Content-Type")
diff --git a/internal/engines/bing_images.go b/internal/engines/bing_images.go
deleted file mode 100644
index 6115c7b..0000000
--- a/internal/engines/bing_images.go
+++ /dev/null
@@ -1,123 +0,0 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-
-package engines
-
-import (
- "context"
- "encoding/xml"
- "errors"
- "fmt"
- "io"
- "net/http"
- "net/url"
- "strings"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-// BingImagesEngine searches Bing Images via their public RSS endpoint.
-type BingImagesEngine struct {
- client *http.Client
-}
-
-func (e *BingImagesEngine) Name() string { return "bing_images" }
-
-func (e *BingImagesEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
- if e == nil || e.client == nil {
- return contracts.SearchResponse{}, errors.New("bing_images engine not initialized")
- }
- q := strings.TrimSpace(req.Query)
- if q == "" {
- return contracts.SearchResponse{Query: req.Query}, nil
- }
-
- offset := (req.Pageno - 1) * 10
- endpoint := fmt.Sprintf(
- "https://www.bing.com/images/search?q=%s&count=10&offset=%d&format=rss",
- url.QueryEscape(q),
- offset,
- )
-
- httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
- if err != nil {
- return contracts.SearchResponse{}, err
- }
- httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/kafka)")
-
- resp, err := e.client.Do(httpReq)
- if err != nil {
- return contracts.SearchResponse{}, err
- }
- defer resp.Body.Close()
-
- if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
- return contracts.SearchResponse{}, fmt.Errorf("bing_images upstream error: status %d", resp.StatusCode)
- }
-
- return parseBingImagesRSS(resp.Body, req.Query)
-}
-
-// parseBingImagesRSS parses Bing's RSS image search results.
-// The description field contains HTML with an tag whose src is the
-// thumbnail and whose enclosing tag links to the source page.
-func parseBingImagesRSS(r io.Reader, query string) (contracts.SearchResponse, error) {
- type bingImageItem struct {
- Title string `xml:"title"`
- Link string `xml:"link"`
- Descrip string `xml:"description"`
- }
-
- type rssFeed struct {
- XMLName xml.Name `xml:"rss"`
- Channel struct {
- Items []bingImageItem `xml:"item"`
- } `xml:"channel"`
- }
-
- var rss rssFeed
- if err := xml.NewDecoder(r).Decode(&rss); err != nil {
- return contracts.SearchResponse{}, fmt.Errorf("bing_images RSS parse error: %w", err)
- }
-
- results := make([]contracts.MainResult, 0, len(rss.Channel.Items))
- for _, item := range rss.Channel.Items {
- if item.Link == "" {
- continue
- }
-
- // Extract thumbnail URL from the description HTML.
- thumbnail := extractImgSrc(item.Descrip)
- content := stripHTML(item.Descrip)
-
- linkPtr := item.Link
- results = append(results, contracts.MainResult{
- Template: "images",
- Title: item.Title,
- Content: content,
- URL: &linkPtr,
- Thumbnail: thumbnail,
- Engine: "bing_images",
- Score: 0,
- Category: "images",
- Engines: []string{"bing_images"},
- })
- }
-
- return contracts.SearchResponse{
- Query: query,
- NumberOfResults: len(results),
- Results: results,
- Answers: []map[string]any{},
- Corrections: []string{},
- Infoboxes: []map[string]any{},
- Suggestions: []string{},
- UnresponsiveEngines: [][2]string{},
- }, nil
-}
diff --git a/internal/engines/bing_test.go b/internal/engines/bing_test.go
index b72cc71..bb09c64 100644
--- a/internal/engines/bing_test.go
+++ b/internal/engines/bing_test.go
@@ -7,7 +7,7 @@ import (
"testing"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestBingEngine_EmptyQuery(t *testing.T) {
diff --git a/internal/engines/brave.go b/internal/engines/brave.go
deleted file mode 100644
index 6b06f7d..0000000
--- a/internal/engines/brave.go
+++ /dev/null
@@ -1,172 +0,0 @@
-package engines
-
-import (
- "context"
- "fmt"
- "io"
- "net/http"
- "net/url"
- "regexp"
- "strings"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-type BraveEngine struct {
- client *http.Client
-}
-
-func (e *BraveEngine) Name() string { return "brave" }
-
-func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
- if strings.TrimSpace(req.Query) == "" {
- return contracts.SearchResponse{Query: req.Query}, nil
- }
-
- start := (req.Pageno - 1) * 20
- u := fmt.Sprintf(
- "https://search.brave.com/search?q=%s&offset=%d&source=web",
- url.QueryEscape(req.Query),
- start,
- )
-
- httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
- if err != nil {
- return contracts.SearchResponse{}, err
- }
- httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36")
- httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
- httpReq.Header.Set("Accept-Language", "en-US,en;q=0.9")
-
- resp, err := e.client.Do(httpReq)
- if err != nil {
- return contracts.SearchResponse{}, err
- }
- defer resp.Body.Close()
-
- if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
- return contracts.SearchResponse{}, fmt.Errorf("brave error: status %d", resp.StatusCode)
- }
-
- body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024))
- if err != nil {
- return contracts.SearchResponse{}, err
- }
-
- results := parseBraveResults(string(body))
- return contracts.SearchResponse{
- Query: req.Query,
- NumberOfResults: len(results),
- Results: results,
- Answers: []map[string]any{},
- Corrections: []string{},
- Infoboxes: []map[string]any{},
- Suggestions: extractBraveSuggestions(string(body)),
- UnresponsiveEngines: [][2]string{},
- }, nil
-}
-
-func parseBraveResults(body string) []contracts.MainResult {
- var results []contracts.MainResult
-
- // Brave wraps each result in divs with data-type="web" or data-type="news".
- // Pattern:
`,
- `]+class="[^"]*snippet[^"]*"[^>]*>(.*?)`,
- }
-
- for _, pat := range patterns {
- re := regexp.MustCompile(`(?s)` + pat)
- m := re.FindStringSubmatch(block)
- if len(m) >= 2 {
- text := stripTags(m[1])
- if text != "" {
- return strings.TrimSpace(text)
- }
- }
- }
- return ""
-}
-
-func extractBraveFavicon(block string) string {
- imgPattern := regexp.MustCompile(`]+class="[^"]*favicon[^"]*"[^>]+src="([^"]+)"`)
- m := imgPattern.FindStringSubmatch(block)
- if len(m) >= 2 {
- return m[1]
- }
- return ""
-}
-
-func extractBraveSuggestions(body string) []string {
- var suggestions []string
- // Brave suggestions appear in a dropdown or related searches section.
- suggestPattern := regexp.MustCompile(`(?s)
]+class="[^"]*suggestion[^"]*"[^>]*>.*?]*>([^<]+)`)
- matches := suggestPattern.FindAllStringSubmatch(body, -1)
- seen := map[string]bool{}
- for _, m := range matches {
- if len(m) < 2 {
- continue
- }
- s := strings.TrimSpace(stripTags(m[1]))
- if s != "" && !seen[s] {
- seen[s] = true
- suggestions = append(suggestions, s)
- }
- }
- return suggestions
-}
diff --git a/internal/engines/braveapi.go b/internal/engines/braveapi.go
index 8386547..2cb20ff 100644
--- a/internal/engines/braveapi.go
+++ b/internal/engines/braveapi.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -27,26 +11,32 @@ import (
"strings"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
-// BraveEngine implements the Brave Web Search API.
-// Required: BRAVE_API_KEY env var or config.
-// Optional: BRAVE_ACCESS_TOKEN to gate requests.
-type BraveAPIEngine struct {
+// BraveEngine implements the SearXNG `braveapi` engine (Brave Web Search API).
+//
+// Config / gating:
+// - BRAVE_API_KEY: required to call Brave
+// - BRAVE_ACCESS_TOKEN (optional): if set, the request must include a token
+// that matches the env var (via Authorization Bearer, X-Search-Token,
+// X-Brave-Access-Token, or form field `token`).
+type BraveEngine struct {
client *http.Client
apiKey string
accessGateToken string
resultsPerPage int
}
-func (e *BraveAPIEngine) Name() string { return "braveapi" }
+func (e *BraveEngine) Name() string { return "braveapi" }
-func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
+func (e *BraveEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
if e == nil || e.client == nil {
return contracts.SearchResponse{}, errors.New("brave engine not initialized")
}
+ // Gate / config checks should not be treated as fatal errors; SearXNG
+ // treats misconfigured engines as unresponsive.
if strings.TrimSpace(e.apiKey) == "" {
return contracts.SearchResponse{
Query: req.Query,
@@ -80,15 +70,10 @@ func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest
return contracts.SearchResponse{Query: req.Query}, nil
}
- // Brave API only supports offset values 0-9 (first page of results).
- // Paginating beyond the first page is not supported by Brave.
offset := 0
if req.Pageno > 1 {
offset = (req.Pageno - 1) * e.resultsPerPage
}
- if offset > 9 {
- offset = 9
- }
args := url.Values{}
args.Set("q", q)
@@ -108,6 +93,8 @@ func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest
}
}
+ // SearXNG's python checks `if params["safesearch"]:` which treats any
+ // non-zero (moderate/strict) as strict.
if req.Safesearch > 0 {
args.Set("safesearch", "strict")
}
@@ -127,8 +114,8 @@ func (e *BraveAPIEngine) Search(ctx context.Context, req contracts.SearchRequest
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
- return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
+ return contracts.SearchResponse{}, fmt.Errorf("brave upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
diff --git a/internal/engines/braveapi_test.go b/internal/engines/braveapi_test.go
index 8b645ed..13c7420 100644
--- a/internal/engines/braveapi_test.go
+++ b/internal/engines/braveapi_test.go
@@ -5,7 +5,7 @@ import (
"net/http"
"testing"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestBraveEngine_GatingAndHeader(t *testing.T) {
@@ -39,7 +39,7 @@ func TestBraveEngine_GatingAndHeader(t *testing.T) {
})
client := &http.Client{Transport: transport}
- engine := &BraveAPIEngine{
+ engine := &BraveEngine{
client: client,
apiKey: wantAPIKey,
accessGateToken: wantToken,
diff --git a/internal/engines/crossref.go b/internal/engines/crossref.go
index 625c328..cdcc0b4 100644
--- a/internal/engines/crossref.go
+++ b/internal/engines/crossref.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -27,7 +11,7 @@ import (
"strings"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
type CrossrefEngine struct {
@@ -63,8 +47,8 @@ func (e *CrossrefEngine) Search(ctx context.Context, req contracts.SearchRequest
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
- return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 16*1024))
+ return contracts.SearchResponse{}, fmt.Errorf("crossref upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
var api struct {
diff --git a/internal/engines/crossref_test.go b/internal/engines/crossref_test.go
index a2f950b..070e85b 100644
--- a/internal/engines/crossref_test.go
+++ b/internal/engines/crossref_test.go
@@ -5,7 +5,7 @@ import (
"net/http"
"testing"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestCrossrefEngine_Search(t *testing.T) {
diff --git a/internal/engines/ddg_images.go b/internal/engines/ddg_images.go
deleted file mode 100644
index 0feedc7..0000000
--- a/internal/engines/ddg_images.go
+++ /dev/null
@@ -1,207 +0,0 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-
-package engines
-
-import (
- "context"
- "encoding/json"
- "errors"
- "fmt"
- "io"
- "net/http"
- "net/url"
- "strconv"
- "strings"
-
- "github.com/metamorphosis-dev/samsa/internal/contracts"
-)
-
-// DuckDuckGoImagesEngine searches DuckDuckGo Images via their vql API.
-type DuckDuckGoImagesEngine struct {
- client *http.Client
-}
-
-func (e *DuckDuckGoImagesEngine) Name() string { return "ddg_images" }
-
-func (e *DuckDuckGoImagesEngine) Search(ctx context.Context, req contracts.SearchRequest) (contracts.SearchResponse, error) {
- if e == nil || e.client == nil {
- return contracts.SearchResponse{}, errors.New("ddg_images engine not initialized")
- }
- q := strings.TrimSpace(req.Query)
- if q == "" {
- return contracts.SearchResponse{Query: req.Query}, nil
- }
-
- // Step 1: Get a VQD token from the initial search page.
- vqd, err := e.getVQD(ctx, q)
- if err != nil {
- return contracts.SearchResponse{
- Query: req.Query,
- UnresponsiveEngines: [][2]string{{"ddg_images", "vqd_fetch_failed"}},
- Results: []contracts.MainResult{},
- Answers: []map[string]any{},
- Corrections: []string{},
- Infoboxes: []map[string]any{},
- Suggestions: []string{},
- }, nil
- }
-
- // Step 2: Fetch image results using the VQD token.
- endpoint := fmt.Sprintf(
- "https://duckduckgo.com/i.js?q=%s&kl=wt-wt&l=wt-wt&p=1&s=%d&vqd=%s",
- url.QueryEscape(q),
- (req.Pageno-1)*50,
- url.QueryEscape(vqd),
- )
-
- httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
- if err != nil {
- return contracts.SearchResponse{}, err
- }
- httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/kafka)")
- httpReq.Header.Set("Referer", "https://duckduckgo.com/")
-
- resp, err := e.client.Do(httpReq)
- if err != nil {
- return contracts.SearchResponse{}, err
- }
- defer resp.Body.Close()
-
- if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 16*1024))
- return contracts.SearchResponse{}, fmt.Errorf("ddg_images upstream error: status %d", resp.StatusCode)
- }
-
- body, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
- if err != nil {
- return contracts.SearchResponse{}, err
- }
-
- return parseDDGImages(body, req.Query)
-}
-
-// getVQD fetches a VQD token from DuckDuckGo's search page.
-func (e *DuckDuckGoImagesEngine) getVQD(ctx context.Context, query string) (string, error) {
- endpoint := "https://duckduckgo.com/?q=" + url.QueryEscape(query)
-
- httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
- if err != nil {
- return "", err
- }
- httpReq.Header.Set("User-Agent", "kafka/0.1 (compatible; +https://git.ashisgreat.xyz/penal-colony/kafka)")
-
- resp, err := e.client.Do(httpReq)
- if err != nil {
- return "", err
- }
- defer resp.Body.Close()
-
- body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
- if err != nil {
- return "", err
- }
-
- // Extract VQD from the HTML: vqd='...'
- vqd := extractVQD(string(body))
- if vqd == "" {
- return "", fmt.Errorf("vqd token not found in response")
- }
- return vqd, nil
-}
-
-// extractVQD extracts the VQD token from DuckDuckGo's HTML response.
-func extractVQD(html string) string {
- // Look for: vqd='...' or vqd="..."
- for _, prefix := range []string{"vqd='", `vqd="`} {
- idx := strings.Index(html, prefix)
- if idx == -1 {
- continue
- }
- start := idx + len(prefix)
- end := start
- for end < len(html) && html[end] != '\'' && html[end] != '"' {
- end++
- }
- if end > start {
- return html[start:end]
- }
- }
- return ""
-}
-
-// ddgImageResult represents a single image result from DDG's JSON API.
-type ddgImageResult struct {
- Title string `json:"title"`
- URL string `json:"url"`
- Thumbnail string `json:"thumbnail"`
- Image string `json:"image"`
- Width int `json:"width"`
- Height int `json:"height"`
- Source string `json:"source"`
-}
-
-func parseDDGImages(body []byte, query string) (contracts.SearchResponse, error) {
- var results struct {
- Results []ddgImageResult `json:"results"`
- }
-
- if err := json.Unmarshal(body, &results); err != nil {
- return contracts.SearchResponse{}, fmt.Errorf("ddg_images JSON parse error: %w", err)
- }
-
- out := make([]contracts.MainResult, 0, len(results.Results))
- for _, img := range results.Results {
- if img.URL == "" {
- continue
- }
-
- // Prefer the full image URL as thumbnail, fall back to the thumbnail field.
- thumb := img.Image
- if thumb == "" {
- thumb = img.Thumbnail
- }
-
- // Build a simple content string showing dimensions.
- content := ""
- if img.Width > 0 && img.Height > 0 {
- content = strconv.Itoa(img.Width) + " × " + strconv.Itoa(img.Height)
- }
- if img.Source != "" {
- if content != "" {
- content += " — " + img.Source
- } else {
- content = img.Source
- }
- }
-
- urlPtr := img.URL
- out = append(out, contracts.MainResult{
- Template: "images",
- Title: img.Title,
- Content: content,
- URL: &urlPtr,
- Thumbnail: thumb,
- Engine: "ddg_images",
- Score: 0,
- Category: "images",
- Engines: []string{"ddg_images"},
- })
- }
-
- return contracts.SearchResponse{
- Query: query,
- NumberOfResults: len(out),
- Results: out,
- Answers: []map[string]any{},
- Corrections: []string{},
- Infoboxes: []map[string]any{},
- Suggestions: []string{},
- UnresponsiveEngines: [][2]string{},
- }, nil
-}
diff --git a/internal/engines/duckduckgo.go b/internal/engines/duckduckgo.go
index b260f1e..28b4972 100644
--- a/internal/engines/duckduckgo.go
+++ b/internal/engines/duckduckgo.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -25,7 +9,7 @@ import (
"net/url"
"strings"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
// DuckDuckGoEngine searches DuckDuckGo's Lite/HTML endpoint.
@@ -63,8 +47,8 @@ func (e *DuckDuckGoEngine) Search(ctx context.Context, req contracts.SearchReque
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
- return contracts.SearchResponse{}, fmt.Errorf("duckduckgo upstream error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
+ return contracts.SearchResponse{}, fmt.Errorf("duckduckgo upstream error: status=%d body=%q", resp.StatusCode, string(body))
}
results, err := parseDuckDuckGoHTML(resp.Body)
diff --git a/internal/engines/duckduckgo_parse.go b/internal/engines/duckduckgo_parse.go
index 223ef09..b3935e4 100644
--- a/internal/engines/duckduckgo_parse.go
+++ b/internal/engines/duckduckgo_parse.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -21,7 +5,7 @@ import (
"net/url"
"strings"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
// parseDuckDuckGoHTML parses DuckDuckGo Lite's HTML response for search results.
diff --git a/internal/engines/duckduckgo_test.go b/internal/engines/duckduckgo_test.go
index 1162a8d..5379bf2 100644
--- a/internal/engines/duckduckgo_test.go
+++ b/internal/engines/duckduckgo_test.go
@@ -7,7 +7,7 @@ import (
"testing"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestDuckDuckGoEngine_EmptyQuery(t *testing.T) {
diff --git a/internal/engines/engine.go b/internal/engines/engine.go
index 3ae2e38..d07aec9 100644
--- a/internal/engines/engine.go
+++ b/internal/engines/engine.go
@@ -1,28 +1,12 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
"context"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
-// Engine is a Go-native implementation of a search engine.
+// Engine is a Go-native implementation of a SearXNG engine.
//
// Implementations should return a SearchResponse containing only the results
// for that engine subset; the caller will merge multiple engine responses.
diff --git a/internal/engines/factory.go b/internal/engines/factory.go
index 38d14d1..937225f 100644
--- a/internal/engines/factory.go
+++ b/internal/engines/factory.go
@@ -1,65 +1,28 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
"net/http"
"os"
"time"
-
- "github.com/metamorphosis-dev/samsa/internal/config"
- "github.com/metamorphosis-dev/samsa/internal/httpclient"
)
-// NewDefaultPortedEngines returns the Go-native engine registry.
-// If cfg is nil, API keys fall back to environment variables.
-func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string]Engine {
+// NewDefaultPortedEngines returns the starter set of Go-native engines.
+// The service can swap/extend this registry later as more engines are ported.
+func NewDefaultPortedEngines(client *http.Client) map[string]Engine {
if client == nil {
- client = httpclient.NewClient(10 * time.Second)
- }
-
- var braveAPIKey, braveAccessToken, youtubeAPIKey string
- if cfg != nil {
- braveAPIKey = cfg.Engines.Brave.APIKey
- braveAccessToken = cfg.Engines.Brave.AccessToken
- youtubeAPIKey = cfg.Engines.YouTube.APIKey
- }
- if braveAPIKey == "" {
- braveAPIKey = os.Getenv("BRAVE_API_KEY")
- }
- if braveAccessToken == "" {
- braveAccessToken = os.Getenv("BRAVE_ACCESS_TOKEN")
- }
- if youtubeAPIKey == "" {
- youtubeAPIKey = os.Getenv("YOUTUBE_API_KEY")
+ client = &http.Client{Timeout: 10 * time.Second}
}
return map[string]Engine{
"wikipedia": &WikipediaEngine{client: client},
- "wikidata": &WikidataEngine{client: client},
"arxiv": &ArxivEngine{client: client},
"crossref": &CrossrefEngine{client: client},
- "braveapi": &BraveAPIEngine{
+ "braveapi": &BraveEngine{
client: client,
- apiKey: braveAPIKey,
- accessGateToken: braveAccessToken,
+ apiKey: os.Getenv("BRAVE_API_KEY"),
+ accessGateToken: os.Getenv("BRAVE_ACCESS_TOKEN"),
resultsPerPage: 20,
},
- "brave": &BraveEngine{client: client},
"qwant": &QwantEngine{
client: client,
category: "web-lite",
@@ -69,24 +32,6 @@ func NewDefaultPortedEngines(client *http.Client, cfg *config.Config) map[string
"github": &GitHubEngine{client: client},
"reddit": &RedditEngine{client: client},
"bing": &BingEngine{client: client},
- "google": &GoogleEngine{client: client},
- "youtube": &YouTubeEngine{
- client: client,
- apiKey: youtubeAPIKey,
- baseURL: "https://www.googleapis.com",
- },
- "stackoverflow": &StackOverflowEngine{client: client, apiKey: stackoverflowAPIKey(cfg)},
- // Image engines
- "bing_images": &BingImagesEngine{client: client},
- "ddg_images": &DuckDuckGoImagesEngine{client: client},
- "qwant_images": &QwantImagesEngine{client: client},
+ "google": &GoogleEngine{client: client},
}
}
-
-// stackoverflowAPIKey returns the Stack Overflow API key from config or env var.
-func stackoverflowAPIKey(cfg *config.Config) string {
- if cfg != nil && cfg.Engines.StackOverflow != nil && cfg.Engines.StackOverflow.APIKey != "" {
- return cfg.Engines.StackOverflow.APIKey
- }
- return os.Getenv("STACKOVERFLOW_KEY")
-}
diff --git a/internal/engines/github.go b/internal/engines/github.go
index 3bfd27e..44102f0 100644
--- a/internal/engines/github.go
+++ b/internal/engines/github.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -27,7 +11,7 @@ import (
"strings"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
// GitHubEngine searches GitHub repositories and code via the public search API.
@@ -66,8 +50,8 @@ func (e *GitHubEngine) Search(ctx context.Context, req contracts.SearchRequest)
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
- return contracts.SearchResponse{}, fmt.Errorf("github api error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
+ return contracts.SearchResponse{}, fmt.Errorf("github api error: status=%d body=%q", resp.StatusCode, string(body))
}
var data struct {
diff --git a/internal/engines/github_test.go b/internal/engines/github_test.go
index 15b2d3b..9b569eb 100644
--- a/internal/engines/github_test.go
+++ b/internal/engines/github_test.go
@@ -6,7 +6,7 @@ import (
"testing"
"time"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
func TestGitHubEngine_EmptyQuery(t *testing.T) {
diff --git a/internal/engines/google.go b/internal/engines/google.go
index 4358761..0371283 100644
--- a/internal/engines/google.go
+++ b/internal/engines/google.go
@@ -1,19 +1,3 @@
-// samsa — a privacy-respecting metasearch engine
-// Copyright (C) 2026-present metamorphosis-dev
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program. If not, see .
-
package engines
import (
@@ -25,13 +9,23 @@ import (
"regexp"
"strings"
- "github.com/metamorphosis-dev/samsa/internal/contracts"
+ "github.com/metamorphosis-dev/kafka/internal/contracts"
)
-// googleUserAgent is an honest User-Agent identifying the metasearch engine.
-// Using a spoofed GSA User-Agent violates Google's Terms of Service and
-// risks permanent IP blocking.
-var googleUserAgent = "Kafka/0.1 (compatible; +https://github.com/metamorphosis-dev/samsa)"
+// GSA User-Agent pool — these are Google Search Appliance identifiers
+// that Google trusts for enterprise search appliance traffic.
+var gsaUserAgents = []string{
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_6_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 18_0_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/399.2.845414227 Mobile/15E148 Safari/604.1",
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/406.0.862495628 Mobile/15E148 Safari/604.1",
+}
+
+func gsaUA() string {
+ return gsaUserAgents[0] // deterministic for now; could rotate
+}
type GoogleEngine struct {
client *http.Client
@@ -47,6 +41,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
start := (req.Pageno - 1) * 10
query := url.QueryEscape(req.Query)
+ // Build URL like SearXNG does.
u := fmt.Sprintf(
"https://www.google.com/search?q=%s&filter=0&start=%d&hl=%s&lr=%s&safe=%s",
query,
@@ -60,7 +55,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
if err != nil {
return contracts.SearchResponse{}, err
}
- httpReq.Header.Set("User-Agent", googleUserAgent)
+ httpReq.Header.Set("User-Agent", gsaUA())
httpReq.Header.Set("Accept", "*/*")
httpReq.AddCookie(&http.Cookie{Name: "CONSENT", Value: "YES+"})
@@ -85,8 +80,8 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
}
if resp.StatusCode != http.StatusOK {
- io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
- return contracts.SearchResponse{}, fmt.Errorf("google error: status %d", resp.StatusCode)
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
+ return contracts.SearchResponse{}, fmt.Errorf("google error: status=%d body=%q", resp.StatusCode, string(body))
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 128*1024))
@@ -107,6 +102,7 @@ func (e *GoogleEngine) Search(ctx context.Context, req contracts.SearchRequest)
}, nil
}
+// detectGoogleSorry returns true if the response is a Google block/CAPTCHA page.
func detectGoogleSorry(resp *http.Response) bool {
if resp.Request != nil {
if resp.Request.URL.Host == "sorry.google.com" || strings.HasPrefix(resp.Request.URL.Path, "/sorry") {
@@ -116,10 +112,17 @@ func detectGoogleSorry(resp *http.Response) bool {
return false
}
+// parseGoogleResults extracts search results from Google's HTML.
+// Uses the same selectors as SearXNG: div.MjjYud for result containers.
func parseGoogleResults(body, query string) []contracts.MainResult {
var results []contracts.MainResult
- mjjPattern := regexp.MustCompile(`
]*class="[^"]*MjjYud[^"]*"[^>]*>(.*?)
`)
+ // SearXNG selector: .//div[contains(@class, "MjjYud")]
+ // Each result block contains a title link and snippet.
+ // We simulate the XPath matching with regex-based extraction.
+
+ // Find all MjjYud div blocks.
+ mjjPattern := regexp.MustCompile(`