From 6ca9d3784c576dee44edc510a945e7889399766f Mon Sep 17 00:00:00 2001 From: Bnyro Date: Mon, 8 Jun 2026 07:09:06 +0200 Subject: [PATCH] [feat] engines: add seek-ninja general engine (#6217) Add support for https://seek.ninja (general) It's very slow because the engine uses Server-side events, that incrementally send data in their HTTP response [1]. I.e. we wait for the end of the response (7+ seconds), even though the results data arrives within a few seconds -> it's very slow, because SearXNG wants to get the full response body before it calls the `response(resp)` method We could use httpx-sse [2], but I'm not sure how to integrate this into SearXNG and if it's worth it [1] https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/ [2] https://github.com/florimondmanca/httpx-sse --- searx/engines/seekninja.py | 113 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 8 +++ 2 files changed, 121 insertions(+) create mode 100644 searx/engines/seekninja.py diff --git a/searx/engines/seekninja.py b/searx/engines/seekninja.py new file mode 100644 index 000000000..882274712 --- /dev/null +++ b/searx/engines/seekninja.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Seek ninja (general)""" + +from json import loads +from hashlib import sha256 +from urllib.parse import urlencode, quote_plus + +import typing as t + +from searx.extended_types import SXNG_Response +from searx.network import get +from searx.result_types import EngineResults +from searx.utils import extr, html_to_text + +if t.TYPE_CHECKING: + from searx.search.processors import OnlineParams + +about = { + "website": "https://seek.ninja", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "JSON", +} + +safesearch = True + +base_url = "https://seek.ninja" +categories = ["general"] + +safe_search_map = {0: "off", 1: "moderate", 2: "strict"} + +PowChallenge = dict[str, t.Any] + + +def _get_challenge(query: str) -> PowChallenge: + """Extract the challenge parameters (i.e. nonce, difficulty, ...) from the + search website.""" + + resp = get(f"{base_url}/s?q={quote_plus(query)}") + challenge_raw_json = "{" + extr(resp.text, "pow: {", "},") + "}" + return loads(challenge_raw_json) + + +def _solve_pow(challenge: PowChallenge) -> list[int]: + """Solves a Proof of Work SHA256 challenges. This is a 1:1 port of the + site's JS code. + + On a high-level, it tries to ``k`` amount of solutions, where its sha256 + hash begins with: ``leading`` 0s, i.e. + + .. code: js + + sha256(nonce || solution).startswith("0" * leading) + """ + nonce = challenge["nonce"] + k = int(challenge["k"]) + indifficulty = float(challenge["indifficulty"]) + + leading = int(indifficulty) + frac = indifficulty - leading + prefix = "".join("0" for _ in range(0, leading)) + + maxNib = 15 - int(frac * 16) if frac else 15 + + solutions: list[int] = [] + ans = 0 + while len(solutions) < k: + h = sha256(f"{nonce}{ans}".encode()).hexdigest() + if h.startswith(prefix) and (not frac or int(h[leading], base=16) <= maxNib): + solutions.append(ans) + ans += 1 + return solutions + + +def request(query: str, params: 'OnlineParams') -> None: + challenge = _get_challenge(query) + solution = _solve_pow(challenge) + args = { + "q": query, + "panswers": ",".join(str(s) for s in solution), + "pid": challenge["challengeId"], + "adult": safe_search_map[params["safesearch"]], + } + params["url"] = f"{base_url}/search-sse?{urlencode(args)}" + + +def response(resp: 'SXNG_Response') -> EngineResults: + res = EngineResults() + # The response is a stream of server-side events, + # so it is split into `event: ` and `data: {"results": ...}` + # see https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/ + events = resp.text.split("\n\n") + for event in events: + event_parts = event.split("\n", maxsplit=2) + if len(event_parts) != 2: + continue + + event_name, data = event_parts + if not event_name.endswith("resultsUpdate"): + continue + + json_data = loads(data.removeprefix("data: ")) + for result in json_data["results"]: + res.add( + res.types.MainResult( + url=result["url"], + title=result["title"], + content=html_to_text(result["blurb"]), + ) + ) + + return res diff --git a/searx/settings.yml b/searx/settings.yml index da5d73a37..db0aebc19 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1976,6 +1976,14 @@ engines: # - ... # disabled: true + - name: seekninja + engine: seekninja + shortcut: sen + # very slow due to its server-side events architecture + timeout: 10 + disabled: true + inactive: true + - name: semantic scholar engine: semantic_scholar shortcut: se