[feat] engines: add seek-ninja general engine (#6217)

Add support for https://seek.ninja (general) It's very slow because the engine uses Server-side events, that incrementally send data in their HTTP response [1]. I.e. we wait for the end of the response (7+ seconds), even though the results data arrives within a few seconds -> it's very slow, because SearXNG wants to get the full response body before it calls the `response(resp)` method We could use httpx-sse [2], but I'm not sure how to integrate this into SearXNG and if it's worth it [1] https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/ [2] https://github.com/florimondmanca/httpx-sse
2026-06-08 19:07:50 +02:00 · 2026-06-08 07:09:06 +02:00
parent 63f264220b
commit 6ca9d3784c
2 changed files with 121 additions and 0 deletions
@@ -0,0 +1,113 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Seek ninja (general)"""
 from json import loads
 from hashlib import sha256
 from urllib.parse import urlencode, quote_plus
 import typing as t
 from searx.extended_types import SXNG_Response
 from searx.network import get
 from searx.result_types import EngineResults
 from searx.utils import extr, html_to_text
 if t.TYPE_CHECKING:
    from searx.search.processors import OnlineParams
 about = {
    "website": "https://seek.ninja",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "JSON",
 }
 safesearch = True
 base_url = "https://seek.ninja"
 categories = ["general"]
 safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
 PowChallenge = dict[str, t.Any]
 def _get_challenge(query: str) -> PowChallenge:
    """Extract the challenge parameters (i.e. nonce, difficulty, ...) from the
    search website."""
    resp = get(f"{base_url}/s?q={quote_plus(query)}")
    challenge_raw_json = "{" + extr(resp.text, "pow: {", "},") + "}"
    return loads(challenge_raw_json)
 def _solve_pow(challenge: PowChallenge) -> list[int]:
    """Solves a Proof of Work SHA256 challenges. This is a 1:1 port of the
    site's JS code.
    On a high-level, it tries to ``k`` amount of solutions, where its sha256
    hash begins with: ``leading`` 0s, i.e.
    .. code: js
       sha256(nonce || solution).startswith("0" * leading)
    """
    nonce = challenge["nonce"]
    k = int(challenge["k"])
    indifficulty = float(challenge["indifficulty"])
    leading = int(indifficulty)
    frac = indifficulty - leading
    prefix = "".join("0" for _ in range(0, leading))
    maxNib = 15 - int(frac * 16) if frac else 15
    solutions: list[int] = []
    ans = 0
    while len(solutions) < k:
        h = sha256(f"{nonce}{ans}".encode()).hexdigest()
        if h.startswith(prefix) and (not frac or int(h[leading], base=16) <= maxNib):
            solutions.append(ans)
        ans += 1
    return solutions
 def request(query: str, params: 'OnlineParams') -> None:
    challenge = _get_challenge(query)
    solution = _solve_pow(challenge)
    args = {
        "q": query,
        "panswers": ",".join(str(s) for s in solution),
        "pid": challenge["challengeId"],
        "adult": safe_search_map[params["safesearch"]],
    }
    params["url"] = f"{base_url}/search-sse?{urlencode(args)}"
 def response(resp: 'SXNG_Response') -> EngineResults:
    res = EngineResults()
    # The response is a stream of server-side events,
    # so it is split into `event: <type>` and `data: {"results": ...}`
    # see https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/
    events = resp.text.split("\n\n")
    for event in events:
        event_parts = event.split("\n", maxsplit=2)
        if len(event_parts) != 2:
            continue
        event_name, data = event_parts
        if not event_name.endswith("resultsUpdate"):
            continue
        json_data = loads(data.removeprefix("data: "))
        for result in json_data["results"]:
            res.add(
                res.types.MainResult(
                    url=result["url"],
                    title=result["title"],
                    content=html_to_text(result["blurb"]),
                )
            )
    return res
@@ -1976,6 +1976,14 @@ engines:
  #       - ...
  #   disabled: true
  - name: seekninja
    engine: seekninja
    shortcut: sen
    # very slow due to its server-side events architecture
    timeout: 10
    disabled: true
    inactive: true
  - name: semantic scholar
    engine: semantic_scholar
    shortcut: se