mirror of
https://github.com/searxng/searxng.git
synced 2026-06-08 19:07:50 +02:00
[feat] engines: add seek-ninja general engine (#6217)
Add support for https://seek.ninja (general) It's very slow because the engine uses Server-side events, that incrementally send data in their HTTP response [1]. I.e. we wait for the end of the response (7+ seconds), even though the results data arrives within a few seconds -> it's very slow, because SearXNG wants to get the full response body before it calls the `response(resp)` method We could use httpx-sse [2], but I'm not sure how to integrate this into SearXNG and if it's worth it [1] https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/ [2] https://github.com/florimondmanca/httpx-sse
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Seek ninja (general)"""
|
||||
|
||||
from json import loads
|
||||
from hashlib import sha256
|
||||
from urllib.parse import urlencode, quote_plus
|
||||
|
||||
import typing as t
|
||||
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.network import get
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import extr, html_to_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://seek.ninja",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
safesearch = True
|
||||
|
||||
base_url = "https://seek.ninja"
|
||||
categories = ["general"]
|
||||
|
||||
safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
|
||||
|
||||
PowChallenge = dict[str, t.Any]
|
||||
|
||||
|
||||
def _get_challenge(query: str) -> PowChallenge:
|
||||
"""Extract the challenge parameters (i.e. nonce, difficulty, ...) from the
|
||||
search website."""
|
||||
|
||||
resp = get(f"{base_url}/s?q={quote_plus(query)}")
|
||||
challenge_raw_json = "{" + extr(resp.text, "pow: {", "},") + "}"
|
||||
return loads(challenge_raw_json)
|
||||
|
||||
|
||||
def _solve_pow(challenge: PowChallenge) -> list[int]:
|
||||
"""Solves a Proof of Work SHA256 challenges. This is a 1:1 port of the
|
||||
site's JS code.
|
||||
|
||||
On a high-level, it tries to ``k`` amount of solutions, where its sha256
|
||||
hash begins with: ``leading`` 0s, i.e.
|
||||
|
||||
.. code: js
|
||||
|
||||
sha256(nonce || solution).startswith("0" * leading)
|
||||
"""
|
||||
nonce = challenge["nonce"]
|
||||
k = int(challenge["k"])
|
||||
indifficulty = float(challenge["indifficulty"])
|
||||
|
||||
leading = int(indifficulty)
|
||||
frac = indifficulty - leading
|
||||
prefix = "".join("0" for _ in range(0, leading))
|
||||
|
||||
maxNib = 15 - int(frac * 16) if frac else 15
|
||||
|
||||
solutions: list[int] = []
|
||||
ans = 0
|
||||
while len(solutions) < k:
|
||||
h = sha256(f"{nonce}{ans}".encode()).hexdigest()
|
||||
if h.startswith(prefix) and (not frac or int(h[leading], base=16) <= maxNib):
|
||||
solutions.append(ans)
|
||||
ans += 1
|
||||
return solutions
|
||||
|
||||
|
||||
def request(query: str, params: 'OnlineParams') -> None:
|
||||
challenge = _get_challenge(query)
|
||||
solution = _solve_pow(challenge)
|
||||
args = {
|
||||
"q": query,
|
||||
"panswers": ",".join(str(s) for s in solution),
|
||||
"pid": challenge["challengeId"],
|
||||
"adult": safe_search_map[params["safesearch"]],
|
||||
}
|
||||
params["url"] = f"{base_url}/search-sse?{urlencode(args)}"
|
||||
|
||||
|
||||
def response(resp: 'SXNG_Response') -> EngineResults:
|
||||
res = EngineResults()
|
||||
# The response is a stream of server-side events,
|
||||
# so it is split into `event: <type>` and `data: {"results": ...}`
|
||||
# see https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/
|
||||
events = resp.text.split("\n\n")
|
||||
for event in events:
|
||||
event_parts = event.split("\n", maxsplit=2)
|
||||
if len(event_parts) != 2:
|
||||
continue
|
||||
|
||||
event_name, data = event_parts
|
||||
if not event_name.endswith("resultsUpdate"):
|
||||
continue
|
||||
|
||||
json_data = loads(data.removeprefix("data: "))
|
||||
for result in json_data["results"]:
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=result["url"],
|
||||
title=result["title"],
|
||||
content=html_to_text(result["blurb"]),
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
@@ -1976,6 +1976,14 @@ engines:
|
||||
# - ...
|
||||
# disabled: true
|
||||
|
||||
- name: seekninja
|
||||
engine: seekninja
|
||||
shortcut: sen
|
||||
# very slow due to its server-side events architecture
|
||||
timeout: 10
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: semantic scholar
|
||||
engine: semantic_scholar
|
||||
shortcut: se
|
||||
|
||||
Reference in New Issue
Block a user