mirror of
https://github.com/searxng/searxng.git
synced 2026-06-08 19:07:50 +02:00
[feat] engines: add seek-ninja general engine (#6217)
Add support for https://seek.ninja (general) It's very slow because the engine uses Server-side events, that incrementally send data in their HTTP response [1]. I.e. we wait for the end of the response (7+ seconds), even though the results data arrives within a few seconds -> it's very slow, because SearXNG wants to get the full response body before it calls the `response(resp)` method We could use httpx-sse [2], but I'm not sure how to integrate this into SearXNG and if it's worth it [1] https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/ [2] https://github.com/florimondmanca/httpx-sse
This commit is contained in:
@@ -0,0 +1,113 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Seek ninja (general)"""
|
||||||
|
|
||||||
|
from json import loads
|
||||||
|
from hashlib import sha256
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
from searx.network import get
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
from searx.utils import extr, html_to_text
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.search.processors import OnlineParams
|
||||||
|
|
||||||
|
about = {
|
||||||
|
"website": "https://seek.ninja",
|
||||||
|
"official_api_documentation": None,
|
||||||
|
"use_official_api": False,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": "JSON",
|
||||||
|
}
|
||||||
|
|
||||||
|
safesearch = True
|
||||||
|
|
||||||
|
base_url = "https://seek.ninja"
|
||||||
|
categories = ["general"]
|
||||||
|
|
||||||
|
safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
|
||||||
|
|
||||||
|
PowChallenge = dict[str, t.Any]
|
||||||
|
|
||||||
|
|
||||||
|
def _get_challenge(query: str) -> PowChallenge:
|
||||||
|
"""Extract the challenge parameters (i.e. nonce, difficulty, ...) from the
|
||||||
|
search website."""
|
||||||
|
|
||||||
|
resp = get(f"{base_url}/s?q={quote_plus(query)}")
|
||||||
|
challenge_raw_json = "{" + extr(resp.text, "pow: {", "},") + "}"
|
||||||
|
return loads(challenge_raw_json)
|
||||||
|
|
||||||
|
|
||||||
|
def _solve_pow(challenge: PowChallenge) -> list[int]:
|
||||||
|
"""Solves a Proof of Work SHA256 challenges. This is a 1:1 port of the
|
||||||
|
site's JS code.
|
||||||
|
|
||||||
|
On a high-level, it tries to ``k`` amount of solutions, where its sha256
|
||||||
|
hash begins with: ``leading`` 0s, i.e.
|
||||||
|
|
||||||
|
.. code: js
|
||||||
|
|
||||||
|
sha256(nonce || solution).startswith("0" * leading)
|
||||||
|
"""
|
||||||
|
nonce = challenge["nonce"]
|
||||||
|
k = int(challenge["k"])
|
||||||
|
indifficulty = float(challenge["indifficulty"])
|
||||||
|
|
||||||
|
leading = int(indifficulty)
|
||||||
|
frac = indifficulty - leading
|
||||||
|
prefix = "".join("0" for _ in range(0, leading))
|
||||||
|
|
||||||
|
maxNib = 15 - int(frac * 16) if frac else 15
|
||||||
|
|
||||||
|
solutions: list[int] = []
|
||||||
|
ans = 0
|
||||||
|
while len(solutions) < k:
|
||||||
|
h = sha256(f"{nonce}{ans}".encode()).hexdigest()
|
||||||
|
if h.startswith(prefix) and (not frac or int(h[leading], base=16) <= maxNib):
|
||||||
|
solutions.append(ans)
|
||||||
|
ans += 1
|
||||||
|
return solutions
|
||||||
|
|
||||||
|
|
||||||
|
def request(query: str, params: 'OnlineParams') -> None:
|
||||||
|
challenge = _get_challenge(query)
|
||||||
|
solution = _solve_pow(challenge)
|
||||||
|
args = {
|
||||||
|
"q": query,
|
||||||
|
"panswers": ",".join(str(s) for s in solution),
|
||||||
|
"pid": challenge["challengeId"],
|
||||||
|
"adult": safe_search_map[params["safesearch"]],
|
||||||
|
}
|
||||||
|
params["url"] = f"{base_url}/search-sse?{urlencode(args)}"
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp: 'SXNG_Response') -> EngineResults:
|
||||||
|
res = EngineResults()
|
||||||
|
# The response is a stream of server-side events,
|
||||||
|
# so it is split into `event: <type>` and `data: {"results": ...}`
|
||||||
|
# see https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/
|
||||||
|
events = resp.text.split("\n\n")
|
||||||
|
for event in events:
|
||||||
|
event_parts = event.split("\n", maxsplit=2)
|
||||||
|
if len(event_parts) != 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
event_name, data = event_parts
|
||||||
|
if not event_name.endswith("resultsUpdate"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
json_data = loads(data.removeprefix("data: "))
|
||||||
|
for result in json_data["results"]:
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
url=result["url"],
|
||||||
|
title=result["title"],
|
||||||
|
content=html_to_text(result["blurb"]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return res
|
||||||
@@ -1976,6 +1976,14 @@ engines:
|
|||||||
# - ...
|
# - ...
|
||||||
# disabled: true
|
# disabled: true
|
||||||
|
|
||||||
|
- name: seekninja
|
||||||
|
engine: seekninja
|
||||||
|
shortcut: sen
|
||||||
|
# very slow due to its server-side events architecture
|
||||||
|
timeout: 10
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: semantic scholar
|
- name: semantic scholar
|
||||||
engine: semantic_scholar
|
engine: semantic_scholar
|
||||||
shortcut: se
|
shortcut: se
|
||||||
|
|||||||
Reference in New Issue
Block a user