Compare commits

..

4 Commits

Author SHA1 Message Date
Bnyro 72a827ae93 [fix] yep: send Sec-Fetch headers to bypass "access denied" (#6223)
Avoids yep's botblocking by sending Sec-Fetch-* headers (as the browser does).
2026-06-08 10:55:17 +02:00
Bnyro 6ca9d3784c [feat] engines: add seek-ninja general engine (#6217)
Add support for https://seek.ninja (general)

It's very slow because the engine uses Server-side events, that incrementally
send data in their HTTP response [1].

I.e. we wait for the end of the response (7+ seconds), even though the results
data arrives within a few seconds -> it's very slow, because SearXNG wants to
get the full response body before it calls the `response(resp)` method

We could use httpx-sse [2], but I'm not sure how to integrate this into SearXNG
and if it's worth it

[1] https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/
[2] https://github.com/florimondmanca/httpx-sse
2026-06-08 07:09:06 +02:00
Bnyro 63f264220b [feat] engines: add heexy engine (general, images) (#6218) 2026-06-08 05:54:35 +02:00
Austin-Olacsi 41fcf0be4b [fix] aol engine uses wikidata id for C++ (#6221) 2026-06-08 05:32:26 +02:00
5 changed files with 255 additions and 21 deletions
+1 -1
View File
@@ -40,7 +40,7 @@ if t.TYPE_CHECKING:
about = {
"website": "https://www.aol.com",
"wikidata_id": "Q2407",
"wikidata_id": "Q27585",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
+90
View File
@@ -0,0 +1,90 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Heexy_ is a minimalist search engine that focuses on privacy.
Although it also supports news and videos, these are not implemented here
because they usually return no result to very few irrelevant ones.
It seems to use Bing internally, as the image thumbnails are loaded from Bing.
.. _Heexy: https://docs.heexy.org/introduction
"""
from urllib.parse import urlencode
import typing as t
from searx.exceptions import SearxEngineAccessDeniedException
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://heexy.org",
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
paging = True
safesearch = True
categories = ["general"]
heexy_categ = "web"
"""Category to search in. Can be either "web" or "image"."""
base_url = "https://seapi.heexy.org"
safe_search_map = {0: "off", 1: "on", 2: "on"}
def init(_):
if heexy_categ not in ("web", "image"):
raise ValueError("invalid search category: %s" % heexy_categ)
def request(query: str, params: "OnlineParams") -> None:
args = {
"q": query,
"page": params["pageno"],
"safe": safe_search_map[params["safesearch"]],
}
if params["searxng_locale"] != "all":
args["lang"] = params["searxng_locale"].split("-")[0]
params["url"] = f"{base_url}/search/{heexy_categ}?{urlencode(args)}"
params["headers"]["Origin"] = base_url
def response(resp: "SXNG_Response"):
res = EngineResults()
json_resp = resp.json()
if not json_resp["success"]:
raise SearxEngineAccessDeniedException()
result: dict[str, str]
for result in json_resp["results"]:
if heexy_categ == "web":
res.add(
res.types.MainResult(
url=result["url"],
title=result["title"],
content=result["description"],
)
)
elif heexy_categ == "image":
res.add(
res.types.Image(
title=result["description"],
url=result["url"],
thumbnail_src=result["image"],
img_src=result["rawImage"],
)
)
return res
+113
View File
@@ -0,0 +1,113 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Seek ninja (general)"""
from json import loads
from hashlib import sha256
from urllib.parse import urlencode, quote_plus
import typing as t
from searx.extended_types import SXNG_Response
from searx.network import get
from searx.result_types import EngineResults
from searx.utils import extr, html_to_text
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
about = {
"website": "https://seek.ninja",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
safesearch = True
base_url = "https://seek.ninja"
categories = ["general"]
safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
PowChallenge = dict[str, t.Any]
def _get_challenge(query: str) -> PowChallenge:
"""Extract the challenge parameters (i.e. nonce, difficulty, ...) from the
search website."""
resp = get(f"{base_url}/s?q={quote_plus(query)}")
challenge_raw_json = "{" + extr(resp.text, "pow: {", "},") + "}"
return loads(challenge_raw_json)
def _solve_pow(challenge: PowChallenge) -> list[int]:
"""Solves a Proof of Work SHA256 challenges. This is a 1:1 port of the
site's JS code.
On a high-level, it tries to ``k`` amount of solutions, where its sha256
hash begins with: ``leading`` 0s, i.e.
.. code: js
sha256(nonce || solution).startswith("0" * leading)
"""
nonce = challenge["nonce"]
k = int(challenge["k"])
indifficulty = float(challenge["indifficulty"])
leading = int(indifficulty)
frac = indifficulty - leading
prefix = "".join("0" for _ in range(0, leading))
maxNib = 15 - int(frac * 16) if frac else 15
solutions: list[int] = []
ans = 0
while len(solutions) < k:
h = sha256(f"{nonce}{ans}".encode()).hexdigest()
if h.startswith(prefix) and (not frac or int(h[leading], base=16) <= maxNib):
solutions.append(ans)
ans += 1
return solutions
def request(query: str, params: 'OnlineParams') -> None:
challenge = _get_challenge(query)
solution = _solve_pow(challenge)
args = {
"q": query,
"panswers": ",".join(str(s) for s in solution),
"pid": challenge["challengeId"],
"adult": safe_search_map[params["safesearch"]],
}
params["url"] = f"{base_url}/search-sse?{urlencode(args)}"
def response(resp: 'SXNG_Response') -> EngineResults:
res = EngineResults()
# The response is a stream of server-side events,
# so it is split into `event: <type>` and `data: {"results": ...}`
# see https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/
events = resp.text.split("\n\n")
for event in events:
event_parts = event.split("\n", maxsplit=2)
if len(event_parts) != 2:
continue
event_name, data = event_parts
if not event_name.endswith("resultsUpdate"):
continue
json_data = loads(data.removeprefix("data: "))
for result in json_data["results"]:
res.add(
res.types.MainResult(
url=result["url"],
title=result["title"],
content=html_to_text(result["blurb"]),
)
)
return res
+27 -20
View File
@@ -16,17 +16,18 @@ if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
about = {
'website': 'https://yep.com/',
'official_api_documentation': 'https://docs.developer.yelp.com',
'use_official_api': False,
'require_api_key': False,
'results': 'JSON',
"website": "https://yep.com/",
"official_api_documentation": "https://docs.developer.yelp.com",
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://api.yep.com"
web_base_url = "https://yep.com"
safesearch = True
safesearch_map = {0: 'off', 1: 'moderate', 2: 'strict'}
safesearch_map = {0: "off", 1: "moderate", 2: "strict"}
enable_http2 = False
@@ -36,34 +37,42 @@ _IMPORT_RE = re.compile(r"import\"(.*?)\";")
_LANGUAGE_RE = re.compile(r"\{english:\".*?\",code_string:\"(.*?)\",code:\".*?\"\}")
def request(query: str, params: 'OnlineParams') -> None:
args = {'query': query, 'safeSearch': safesearch_map[params['safesearch']], 'limit': results_per_page}
def request(query: str, params: "OnlineParams") -> None:
args = {"query": query, "safeSearch": safesearch_map[params["safesearch"]], "limit": results_per_page}
engine_language: str = traits.get_language(params["searxng_locale"])
engine_language: str | None = traits.get_language(params["searxng_locale"])
if engine_language:
args["hl"] = engine_language
params['url'] = f"{base_url}/search?{urlencode(args)}"
params['headers']['Referer'] = 'https://yep.com/'
params['headers']['Origin'] = 'https://yep.com'
params["url"] = f"{base_url}/search?{urlencode(args)}"
params["headers"].update(
{
"Referer": f"{web_base_url}/",
"Origin": web_base_url,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
}
)
def response(resp: 'SXNG_Response') -> EngineResults:
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
for result in resp.json()[1]['results']:
result: dict[str, str]
for result in resp.json()[1]["results"]:
res.add(
res.types.MainResult(
url=result['url'],
title=result['title'],
content=html_to_text(result['snippet']),
url=result["url"],
title=result["title"],
content=html_to_text(result["snippet"]),
)
)
return res
def fetch_traits(engine_traits: 'EngineTraits'):
def fetch_traits(engine_traits: "EngineTraits"):
"""Fetch :ref:`languages <yep languages>` and :ref:`regions <yep
regions>` from Yep.
@@ -83,8 +92,6 @@ def fetch_traits(engine_traits: 'EngineTraits'):
from searx.utils import gen_useragent
web_base_url = "https://yep.com"
headers = {
"User-Agent": gen_useragent(),
"Referer": f"{web_base_url}/",
+24
View File
@@ -1131,6 +1131,22 @@ engines:
shortcut: hn
disabled: true
- name: heexy
engine: heexy
categories: general
heexy_categ: web
shortcut: he
disabled: true
inactive: true
- name: heexy images
engine: heexy
categories: images
heexy_categ: image
shortcut: hei
disabled: true
inactive: true
- name: hex
engine: hex
shortcut: hex
@@ -1960,6 +1976,14 @@ engines:
# - ...
# disabled: true
- name: seekninja
engine: seekninja
shortcut: sen
# very slow due to its server-side events architecture
timeout: 10
disabled: true
inactive: true
- name: semantic scholar
engine: semantic_scholar
shortcut: se