Compare commits

...

8 Commits

Author SHA1 Message Date
Markus Heiser 28ef4f7447 [mod] hardening of the Result.filter_urls() method (#6117)
Exceptions in the execution of the callback must be caught / ignored and logged
on the ERROR log.

To test, apply this patch to provoke a ValueError exception::

    diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py
    index ed4415bce..695ed05d2 100644
    --- a/searx/data/tracker_patterns.py
    +++ b/searx/data/tracker_patterns.py
    @@ -114,6 +114,7 @@ class TrackerPatternsDB:
             Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
             If URL should be modified, the returned string is the new URL to use.
             """
    +        raise ValueError("test callback exceptions")

             new_url = url
             parsed_new_url = urlparse(url=new_url)

Start a `make run` instance and query for example `amazon` .. have a look at the
ERROR log:

    ERROR   searx.result_types: filter_urls (field 'url'): ignore ValueError('test callback exceptions') from callback searx/data/tracker_patterns.py:117

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-05-25 18:12:40 +02:00
Bnyro cb4b70ac50 [fix] qwant news: results don't have any descriptions (#6135)
BTW: fix some typecast issues
2026-05-25 18:04:14 +02:00
Markus Heiser e29e861e2c [fix] bing engines - geoblocking in China (#6134)
In regions like China, the domain must be adjusted to avoid a redirect.

- https://github.com/searxng/searxng/issues/5243
- https://github.com/searxng/searxng/pull/5324
- https://github.com/searxng/searxng/pull/6133

Suggested / tested by @hubutui in https://github.com/searxng/searxng/pull/6133#issuecomment-4534637069

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-05-25 17:05:08 +02:00
Markus Heiser 89b89a88fe [mod] engine: MyMemory Translated - typification and html to text (#6132)
The implementation is normalized, type annotations are applied, and the results
are freed from the HTML markup (which is partially present).

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-05-25 16:38:06 +02:00
Bnyro 46071a011a [mod] qwant: remove web lite and improve request spoofing (#6127)
- https://lite.qwant.com seems to be dead.
- The request parameters were changed to match the ones from the Qwant website.
- Qwant is now set to inactive by default due to its strict rate-limits
2026-05-25 15:46:40 +02:00
Bnyro b0d8af96bf [feat] engines: add flaticon icons engine (#6122) 2026-05-25 13:41:44 +02:00
Markus Heiser dd27fce3b7 [unbload] drop meaningless field `number_of_results_xpath` from results (#6130)
In the result-list, the ``number_of_results`` indicate the number of hits in the
Index, they do not indicate how many results are in the answer.

In the past, search engines such as google or ddg had an indication on the first
page of a search term of how many hits there were for this term in total in
their index.

This info was added up in SearXNG and delivered under ``number_of_results``.
Nowadays the search engines no longer indicate how many hits there are in the
index and so this field in SearXNG is also superfluous.

- https://github.com/searxng/searxng/issues/2457#issuecomment-2566181574
- https://github.com/searxng/searxng/issues/2987
- https://github.com/searxng/searxng/issues/5034

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-05-25 12:43:02 +02:00
Markus Heiser efc305b7f9 [mod] normalize variable name for the max number of results per request (#6131)
[mod] normalize variable name for the max number of results per request

In the past, we have used different names for the variable that specifies the
maximum number of hits in the outgoing request.

- ``page_size``
- ``number_of_results``
- ``nb_per_page``

Since *page_size* is the most accurate term and is also used in the XPath
engines, all other engines are adjusted accordingly within this
patch .. documentation adjusted accordingly.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-05-25 12:41:31 +02:00
40 changed files with 298 additions and 309 deletions
+1 -1
View File
@@ -107,7 +107,7 @@ module:
======================= =========== ===========================================
base_url string base-url, can be overwritten to use same
engine on other URL
number_of_results int maximum number of results per request
page_size int maximum number of results per request
language string ISO code of language and country like en_US
api_key string api-key if required by engine
======================= =========== ===========================================
-9
View File
@@ -39,7 +39,6 @@ url_xpath = './h4/a/@href'
title_xpath = './h4/a[1]'
content_xpath = './/p[1]'
correction_xpath = '//*[@id="didYouMean"]//a'
number_of_results_xpath = '//*[@id="totalResults"]'
name_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@name'
value_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@value'
@@ -107,14 +106,6 @@ def response(resp):
for correction in eval_xpath_list(dom, correction_xpath):
results.append({'correction': extract_text(correction)})
# get number of results
number_of_results = eval_xpath(dom, number_of_results_xpath)
if number_of_results:
try:
results.append({'number_of_results': int(extract_text(number_of_results))})
except: # pylint: disable=bare-except
pass
# Update the tokens to the newest ones
token_str = _get_tokens(dom)
CACHE.set('ahmia-tokens', token_str, expire=60 * 60)
+2 -2
View File
@@ -21,7 +21,7 @@ about = {
categories = ['images']
paging = True
nb_per_page = 20
page_size = 20
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
image_api = 'https://www.artic.edu/iiif/2/'
@@ -34,7 +34,7 @@ def request(query, params):
'q': query,
'page': params['pageno'],
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
'limit': nb_per_page,
'limit': page_size,
}
)
params['url'] = search_api + args
+3 -3
View File
@@ -26,7 +26,7 @@ base_url = (
# engine dependent config
paging = True
number_of_results = 10
page_size = 10
# shortcuts for advanced search
shortcut_dict = {
@@ -57,12 +57,12 @@ def request(query, params):
query = re.sub(key, val, query)
# basic search
offset = (params['pageno'] - 1) * number_of_results
offset = (params['pageno'] - 1) * page_size
string_args = {
'query': urlencode({'query': query}),
'offset': offset,
'hits': number_of_results,
'hits': page_size,
}
params['url'] = base_url.format(**string_args)
+3 -16
View File
@@ -13,7 +13,6 @@ implementations are shared by other engines:
"""
import base64
import re
import typing as t
from urllib.parse import parse_qs, urlencode, urlparse
@@ -48,7 +47,7 @@ _safesearch_map: dict[int, str] = {
}
"""Filter results. 0: None, 1: Moderate, 2: Strict"""
base_url = "https://www.bing.com/search"
base_url = "https://www.bing.com"
"""Bing-Web search URL"""
@@ -94,7 +93,7 @@ def override_accept_language(params: "OnlineParams", engine_region: str | None)
params["headers"]["Accept-Language"] = f"{engine_region},{lang};q=0.9"
def request(query: str, params: "OnlineParams") -> "OnlineParams":
def request(query: str, params: "OnlineParams"):
"""Assemble a Bing-Web request."""
engine_region = traits.get_region(params["searxng_locale"], traits.all_locale)
@@ -110,13 +109,7 @@ def request(query: str, params: "OnlineParams") -> "OnlineParams":
if locale_params:
query_params.update(locale_params)
params["url"] = f"{base_url}?{urlencode(query_params)}"
# in some regions where geoblocking is employed (e.g. China),
# www.bing.com redirects to the regional version of Bing
params["allow_redirects"] = True
return params
params["url"] = f"{base_url}/search?{urlencode(query_params)}"
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
@@ -159,12 +152,6 @@ def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
results.append({"url": href, "title": title, "content": content})
if results:
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
result_len_container = re.sub(r"[^0-9]", "", result_len_container)
if result_len_container:
results.append({"number_of_results": int(result_len_container)})
return results
+2 -4
View File
@@ -34,7 +34,7 @@ time_map = {
"year": 60 * 24 * 365,
}
base_url = "https://www.bing.com/images/async"
base_url = "https://www.bing.com"
"""Bing-Image search URL"""
@@ -64,9 +64,7 @@ def request(query, params):
if params["time_range"]:
query_params["qft"] = "filterui:age-lt%s" % time_map[params["time_range"]]
params["url"] = base_url + "?" + urlencode(query_params)
return params
params["url"] = base_url + "/images/async?" + urlencode(query_params)
def response(resp):
+2 -4
View File
@@ -44,7 +44,7 @@ time_map = {
difference of *last day* and *last week* in the result list is just marginally.
Bing does not have news range ``year`` / we use ``month`` instead."""
base_url = "https://www.bing.com/news/infinitescrollajax"
base_url = "https://www.bing.com"
"""Bing (News) search URL"""
@@ -74,9 +74,7 @@ def request(query, params):
if params["time_range"]:
query_params["qft"] = time_map.get(params["time_range"], 'interval="9"')
params["url"] = base_url + "?" + urlencode(query_params)
return params
params["url"] = base_url + "/news/infinitescrollajax?" + urlencode(query_params)
def response(resp):
+2 -2
View File
@@ -29,7 +29,7 @@ paging = True
safesearch = True
time_range_support = True
base_url = "https://www.bing.com/videos/asyncv2"
base_url = "https://www.bing.com"
"""Bing-Video search URL"""
@@ -60,7 +60,7 @@ def request(query, params):
query_params["form"] = "VRFLTR"
query_params["qft"] = " filterui:videoage-lt%s" % time_map[params["time_range"]]
params["url"] = base_url + "?" + urlencode(query_params)
params["url"] = base_url + "/videos/asyncv2?" + urlencode(query_params)
return params
+2 -2
View File
@@ -16,7 +16,7 @@ about = {
paging = True
categories = []
number_of_results = 20
page_size = 20
skip_premium = True
@@ -25,7 +25,7 @@ thumbnail_format = "crop-240x300"
def request(query, params):
args = {'query': query, 'limit': number_of_results, 'offset': (params['pageno'] - 1) * number_of_results}
args = {'query': query, 'limit': page_size, 'offset': (params['pageno'] - 1) * page_size}
params['url'] = f"{base_url}/v2/search-gateway/recipes?{urlencode(args)}"
return params
+3 -3
View File
@@ -56,7 +56,7 @@ the API key in the engine :ref:`core engine config`."""
categories = ["science", "scientific publications"]
paging = True
nb_per_page = 10
page_size = 10
base_url = "https://api.core.ac.uk/v3/search/works/"
@@ -77,8 +77,8 @@ def request(query: str, params: "OnlineParams") -> None:
# API v3 uses different parameters
search_params = {
"q": query,
"offset": (params["pageno"] - 1) * nb_per_page,
"limit": nb_per_page,
"offset": (params["pageno"] - 1) * page_size,
"limit": page_size,
"sort": "relevance",
}
+2 -2
View File
@@ -38,7 +38,7 @@ about = {
# engine dependent config
categories = ["videos"]
paging = True
number_of_results = 10
page_size = 10
time_range_support = True
time_delta_dict = {
@@ -113,7 +113,7 @@ def request(query, params):
"password_protected": "false",
"private": "false",
"sort": "relevance",
"limit": number_of_results,
"limit": page_size,
"fields": ",".join(result_fields),
}
-1
View File
@@ -109,7 +109,6 @@ def search(query: str, params: "RequestParams") -> EngineResults:
kvmap=kvmap,
)
)
res.add(res.types.LegacyResult(number_of_results=count))
# cache counter value for 20sec
CACHE.set("count", count, expire=20)
-2
View File
@@ -176,6 +176,4 @@ def response(resp):
results.append(result)
results.append({'number_of_results': len(json_data['topics'])})
return results
-1
View File
@@ -21,7 +21,6 @@ about = {
# engine dependent config
categories = ['general'] # 'images', 'music', 'videos', 'files'
paging = False
number_of_results = 5
# search-url
# Doku is OpenSearch compatible
-8
View File
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Duden"""
import re
from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
@@ -51,13 +50,6 @@ def response(resp):
dom = html.fromstring(resp.text)
number_of_results_element = eval_xpath_getindex(
dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None
)
if number_of_results_element is not None:
number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
results.append({'number_of_results': int(number_of_results_string)})
for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
url = urljoin(base_url, url)
+70
View File
@@ -0,0 +1,70 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Flaticon_ is a database for icons.
.. _Flaticon: https://www.flaticon.com
"""
from urllib.parse import urlencode
import typing as t
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://www.flaticon.com",
"wikidata_id": "Q105283791",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://www.flaticon.com"
categories = ["images", "icons"]
paging = True
def request(query: str, params: "OnlineParams") -> None:
args = {
"word": query,
}
params["headers"].update(
{
# important: query term is not URL encoded in the referer string
"Referer": f"{base_url}/search?word={query}",
"X-Requested-With": "XMLHttpRequest",
}
)
params["url"] = f"{base_url}/ajax/search/{params['pageno']}?{urlencode(args)}"
def _fix_url(url: str) -> str:
return url.replace(r"\/", "/")
def response(resp: "SXNG_Response"):
res = EngineResults()
result: dict[str, str] # TBH: dict[str, t.Any]
for result in resp.json()["items"]:
res.add(
res.types.LegacyResult(
{
"template": "images.html",
"url": _fix_url(result["slug"]),
"thumbnail_src": _fix_url(result["png"]),
"img_src": _fix_url(result["png512"]),
"title": result["name"],
"content": ", ".join([tag["tag"] for tag in result["tags"]]), # pyright: ignore[reportArgumentType]
"author": result["team_name"],
}
)
)
return res
+3 -3
View File
@@ -20,7 +20,7 @@ about = {
categories = ['images']
nb_per_page = 15
page_size = 15
paging = True
api_key = None
@@ -29,7 +29,7 @@ url = (
'https://api.flickr.com/services/rest/?method=flickr.photos.search'
+ '&api_key={api_key}&{text}&sort=relevance'
+ '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z'
+ '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
+ '&per_page={page_size}&format=json&nojsoncallback=1&page={page}'
)
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
@@ -42,7 +42,7 @@ def build_flickr_url(user_id, photo_id):
def request(query, params):
params['url'] = url.format(
text=urlencode({'text': query}), api_key=api_key, nb_per_page=nb_per_page, page=params['pageno']
text=urlencode({'text': query}), api_key=api_key, page_size=page_size, page=params['pageno']
)
return params
+3 -3
View File
@@ -51,7 +51,7 @@ about = {
# engine dependent config
categories = ['general']
paging = True
number_of_results = 5
page_size = 5
search_type: str = 'nearmatch'
"""Which type of search to perform. One of the following values: ``nearmatch``,
@@ -110,7 +110,7 @@ def request(query, params):
params['language'] = params['language'].split('-')[0]
api_url = f"{base_url.rstrip('/')}/{api_path}?".format(language=params['language'])
offset = (params['pageno'] - 1) * number_of_results
offset = (params['pageno'] - 1) * page_size
args = {
'action': 'query',
@@ -118,7 +118,7 @@ def request(query, params):
'format': 'json',
'srsearch': query,
'sroffset': offset,
'srlimit': number_of_results,
'srlimit': page_size,
'srwhat': search_type,
'srprop': srprop,
'srsort': srsort,
+3 -3
View File
@@ -14,7 +14,7 @@ about = {
}
# engine dependent config
number_of_results = 20 # Don't put this over 5000
page_size = 20 # Don't put this over 5000
categories = ["it", "packages"]
disabled = True
shortcut = "cpan"
@@ -43,7 +43,7 @@ query_data_template = {
{"date": {"order": "desc"}},
],
'_source': ['documentation', "abstract"],
'size': number_of_results,
'size': page_size,
}
search_url = urlunparse(["https", "fastapi.metacpan.org", "/v1/file/_search", "", "", ""])
@@ -53,7 +53,7 @@ def request(query, params):
params["method"] = "POST"
query_data = query_data_template
query_data["query"]["multi_match"]["query"] = query
query_data["from"] = (params["pageno"] - 1) * number_of_results
query_data["from"] = (params["pageno"] - 1) * page_size
params["json"] = query_data
return params
-1
View File
@@ -93,7 +93,6 @@ def search(query, params) -> EngineResults:
query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page)
res.add(res.types.LegacyResult(number_of_results=query.count()))
for row in query:
del row['_id']
kvmap = {str(k): str(v) for k, v in row.items()}
+3 -3
View File
@@ -21,15 +21,15 @@ about = {
categories = ['images']
paging = True
nb_per_page = 20
page_size = 20
base_url = 'https://api.openverse.org/v1/images/'
search_string = '?page={page}&page_size={nb_per_page}&format=json&{query}'
search_string = '?page={page}&page_size={page_size}&format=json&{query}'
def request(query, params):
search_path = search_string.format(query=urlencode({'q': query}), nb_per_page=nb_per_page, page=params['pageno'])
search_path = search_string.format(query=urlencode({'q': query}), page_size=page_size, page=params['pageno'])
params['url'] = base_url + search_path
+2 -2
View File
@@ -20,7 +20,7 @@ about = {
# engine dependent config
categories = ['map']
paging = False
number_of_results = 10
page_size = 10
# search-url
base_url = 'https://photon.komoot.io/'
@@ -33,7 +33,7 @@ supported_languages = ['de', 'en', 'fr', 'it']
# do search-request
def request(query, params):
params['url'] = base_url + search_string.format(query=urlencode({'q': query}), limit=number_of_results)
params['url'] = base_url + search_string.format(query=urlencode({'q': query}), limit=page_size)
if params['language'] != 'all':
language = params['language'].split('_')[0]
+3 -3
View File
@@ -57,7 +57,7 @@ categories = ["science", "scientific publications"]
eutils_api = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
# engine dependent config
number_of_results = 10
page_size = 10
pubmed_url = "https://www.ncbi.nlm.nih.gov/pubmed/"
@@ -67,8 +67,8 @@ def request(query: str, params: "OnlineParams") -> None:
{
"db": "pubmed",
"term": query,
"retstart": (params["pageno"] - 1) * number_of_results,
"hits": number_of_results,
"retstart": (params["pageno"] - 1) * page_size,
"hits": page_size,
}
)
esearch_url = f"{eutils_api}/esearch.fcgi?{args}"
+112 -148
View File
@@ -6,7 +6,6 @@ engineered by reading the network log of https://www.qwant.com/ queries.
For Qwant's *web-search* two alternatives are implemented:
- ``web``: uses the :py:obj:`api_url` which returns a JSON structure
- ``web-lite``: uses the :py:obj:`web_lite_url` which returns a HTML page
Configuration
@@ -22,7 +21,7 @@ This implementation is used by different qwant engines in the :ref:`settings.yml
.. code:: yaml
- name: qwant
qwant_categ: web-lite # alternatively use 'web'
qwant_categ: web
...
- name: qwant news
qwant_categ: news
@@ -39,6 +38,8 @@ Implementations
"""
import typing as t
from datetime import (
datetime,
timedelta,
@@ -47,8 +48,7 @@ from json import loads
from urllib.parse import urlencode
import babel
import lxml
from flask_babel import gettext
from flask_babel import gettext # pyright: ignore[reportUnknownVariableType]
from searx.enginelib.traits import EngineTraits
from searx.exceptions import (
@@ -59,11 +59,13 @@ from searx.exceptions import (
)
from searx.network import raise_for_httperror
from searx.utils import (
eval_xpath,
eval_xpath_list,
extract_text,
get_embeded_stream_url,
)
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
from searx.extended_types import SXNG_Response
# about
about = {
@@ -82,113 +84,66 @@ max_page = 5
"""5 pages maximum (``&p=5``): Trying to do more just results in an improper
redirect"""
qwant_categ = None
"""One of ``web-lite`` (or ``web``), ``news``, ``images`` or ``videos``"""
qwant_categ: str = None # pyright: ignore[reportAssignmentType]
"""One of ``web``, ``news``, ``images`` or ``videos``"""
safesearch = True
# safe_search_map = {0: '&safesearch=0', 1: '&safesearch=1', 2: '&safesearch=2'}
# fmt: off
qwant_news_locales = [
'ca_ad', 'ca_es', 'ca_fr', 'co_fr', 'de_at', 'de_ch', 'de_de', 'en_au',
'en_ca', 'en_gb', 'en_ie', 'en_my', 'en_nz', 'en_us', 'es_ad', 'es_ar',
'es_cl', 'es_co', 'es_es', 'es_mx', 'es_pe', 'eu_es', 'eu_fr', 'fc_ca',
'fr_ad', 'fr_be', 'fr_ca', 'fr_ch', 'fr_fr', 'it_ch', 'it_it', 'nl_be',
'nl_nl', 'pt_ad', 'pt_pt',
"ca_ad", "ca_es", "ca_fr", "co_fr", "de_at", "de_ch", "de_de", "en_au",
"en_ca", "en_gb", "en_ie", "en_my", "en_nz", "en_us", "es_ad", "es_ar",
"es_cl", "es_co", "es_es", "es_mx", "es_pe", "eu_es", "eu_fr", "fc_ca",
"fr_ad", "fr_be", "fr_ca", "fr_ch", "fr_fr", "it_ch", "it_it", "nl_be",
"nl_nl", "pt_ad", "pt_pt",
]
# fmt: on
# search-url
api_url = "https://api.qwant.com/v3/search/"
"""URL of Qwant's API (JSON)"""
web_lite_url = "https://lite.qwant.com/"
"""URL of Qwant-Lite (HTML)"""
def request(query, params):
def request(query: str, params: "OnlineParams") -> None:
"""Qwant search request"""
if not query:
return None
return
q_locale = traits.get_region(params["searxng_locale"], default="en_US")
url = api_url + f"{qwant_categ}?"
args = {"q": query}
results_per_page = 10
if qwant_categ == "images":
results_per_page = 50
args = {
"q": query,
"count": results_per_page,
"locale": q_locale,
"offset": (params["pageno"] - 1) * results_per_page,
"device": "desktop",
"safesearch": params["safesearch"],
"tgp": 1,
"display": True,
"llm": True,
}
params["raise_for_httperror"] = False
if qwant_categ == "web-lite":
url = web_lite_url + "?"
args["locale"] = q_locale.lower()
args["l"] = q_locale.split("_")[0]
args["s"] = params["safesearch"]
args["p"] = params["pageno"]
params["raise_for_httperror"] = True
elif qwant_categ == "images":
args["count"] = 50
args["locale"] = q_locale
args["safesearch"] = params["safesearch"]
args["tgp"] = 3
args["offset"] = (params["pageno"] - 1) * args["count"]
else: # web, news, videos
args["count"] = 10
args["locale"] = q_locale
args["safesearch"] = params["safesearch"]
args["llm"] = "false"
args["tgp"] = 3
args["offset"] = (params["pageno"] - 1) * args["count"]
params["url"] = url + urlencode(args)
return params
params["url"] = f"{api_url}{qwant_categ}?{urlencode(args)}"
def response(resp):
if qwant_categ == "web-lite":
return parse_web_lite(resp)
return parse_web_api(resp)
def parse_web_lite(resp):
"""Parse results from Qwant-Lite"""
results = []
dom = lxml.html.fromstring(resp.text)
for item in eval_xpath_list(dom, "//section/article"):
if eval_xpath(item, "./span[contains(@class, 'tooltip')]"):
# ignore randomly interspersed advertising adds
continue
results.append(
{
"url": extract_text(eval_xpath(item, "./span[contains(@class, 'url partner')]")),
"title": extract_text(eval_xpath(item, "./h2/a")),
"content": extract_text(eval_xpath(item, "./p")),
}
)
return results
def parse_web_api(resp):
def response(resp: "SXNG_Response") -> EngineResults:
"""Parse results from Qwant's API"""
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
results = []
res = EngineResults()
# Try to load JSON result
search_results: dict[str, t.Any] = {}
try:
search_results = loads(resp.text)
search_results = resp.json()
except ValueError:
search_results = {}
pass
data = search_results.get("data", {})
data: dict[str, t.Any] = search_results.get("data", {}) # pyright: ignore[reportAny]
# check for an API error
if search_results.get("status") != "success":
@@ -207,13 +162,13 @@ def parse_web_api(resp):
if qwant_categ == "web":
# The WEB query contains a list named 'mainline'. This list can contain
# different result types (e.g. mainline[0]['type'] returns type of the
# result items in mainline[0]['items']
# different result types (e.g. mainline[0]["type"] returns type of the
# result items in mainline[0]["items"]
mainline = data.get("result", {}).get("items", {}).get("mainline", {})
else:
# Queries on News, Images and Videos do not have a list named 'mainline'
# in the response. The result items are directly in the list
# result['items'].
# result["items"].
mainline = data.get("result", {}).get("items", [])
mainline = [
{"type": qwant_categ, "items": mainline},
@@ -221,8 +176,9 @@ def parse_web_api(resp):
# return empty array if there are no results
if not mainline:
return []
return res
row: dict[str, t.Any]
for row in mainline:
mainline_type = row.get("type", "web")
if mainline_type != qwant_categ:
@@ -232,90 +188,98 @@ def parse_web_api(resp):
# ignore adds
continue
mainline_items = row.get("items", [])
mainline_items: list[dict[str, t.Any]] = row.get("items", [])
for item in mainline_items:
title = item.get("title", None)
res_url = item.get("url", None)
title: str = item.get("title", "")
res_url: str = item.get("url", "")
pub_date: datetime | None = None
thumbnail: str = ""
content: str = item.get("desc", "")
_date: float | None = item.get("date")
if _date:
try:
pub_date = datetime.fromtimestamp(_date)
except ValueError:
# news' date value milli seconds
pub_date = datetime.fromtimestamp(_date / 1000)
if mainline_type == "web":
content = item["desc"]
results.append(
{
"title": title,
"url": res_url,
"content": content,
}
res.add(
res.types.MainResult(
title=title,
url=res_url,
content=content,
)
)
elif mainline_type == "news":
pub_date = item["date"]
if pub_date is not None:
pub_date = datetime.fromtimestamp(pub_date)
news_media = item.get("media", [])
thumbnail = None
if news_media:
thumbnail = news_media[0].get("pict", {}).get("url", None)
results.append(
{
"title": title,
"url": res_url,
"publishedDate": pub_date,
"thumbnail": thumbnail,
}
thumbnail = news_media[0].get("pict", {}).get("url", "")
res.add(
res.types.MainResult(
title=title,
content=content,
url=res_url,
publishedDate=pub_date,
thumbnail=thumbnail,
)
)
elif mainline_type == "images":
thumbnail = item["thumbnail"]
img_src = item["media"]
results.append(
{
"title": title,
"url": res_url,
"template": "images.html",
"thumbnail_src": thumbnail,
"img_src": img_src,
"resolution": f"{item['width']} x {item['height']}",
"img_format": item.get("thumb_type"),
}
res.add(
res.types.LegacyResult(
title=title,
url=res_url,
template="images.html",
thumbnail_src=item["thumbnail"] or "",
img_src=item["media"] or "",
resolution=f"{item['width']} x {item['height']}",
img_format=item.get("thumb_type"),
)
)
elif mainline_type == "videos":
# some videos do not have a description: while qwant-video
# returns an empty string, such video from a qwant-web query
# miss the 'desc' key.
d, s, c = item.get("desc"), item.get("source"), item.get("channel")
content_parts = []
d: str = item.get("desc", "")
s: str = item.get("source", "")
c: str = item.get("channel", "")
content_parts: list[str] = []
if d:
content_parts.append(d)
content_parts.append(f"{d}")
if s:
content_parts.append("%s: %s " % (gettext("Source"), s))
content_parts.append(f"{gettext('Source')}: {s} ")
if c:
content_parts.append("%s: %s " % (gettext("Channel"), c))
content_parts.append(f"{gettext('Channel')}: {c} ")
content = " // ".join(content_parts)
length = item["duration"]
if length is not None:
length = timedelta(milliseconds=length)
pub_date = item["date"]
if pub_date is not None:
pub_date = datetime.fromtimestamp(pub_date)
thumbnail = item["thumbnail"]
length = timedelta(milliseconds=(item["duration"] or 0))
thumbnail = item["thumbnail"] or ""
# from some locations (DE and others?) the s2 link do
# response a 'Please wait ..' but does not deliver the thumbnail
thumbnail = thumbnail.replace("https://s2.qwant.com", "https://s1.qwant.com", 1)
results.append(
{
"title": title,
"url": res_url,
"content": content,
"iframe_src": get_embeded_stream_url(res_url),
"publishedDate": pub_date,
"thumbnail": thumbnail,
"template": "videos.html",
"length": length,
}
res.add(
res.types.LegacyResult(
title=title,
url=res_url,
content=content,
iframe_src=get_embeded_stream_url(res_url),
publishedDate=pub_date,
thumbnail=thumbnail,
template="videos.html",
length=length,
)
)
return results
return res
def fetch_traits(engine_traits: EngineTraits):
@@ -326,7 +290,7 @@ def fetch_traits(engine_traits: EngineTraits):
from searx.utils import extr
resp = get(
about["website"],
about["website"], # pyright: ignore[reportArgumentType]
timeout=5,
)
if not resp.ok:
@@ -336,7 +300,7 @@ def fetch_traits(engine_traits: EngineTraits):
q_initial_props = loads(json_string)
q_locales = q_initial_props.get("locales")
eng_tag_list = set()
eng_tag_list: set[str] = set()
for country, v in q_locales.items():
for lang in v["langs"]:
+3 -3
View File
@@ -28,7 +28,7 @@ about = {
paging = True
categories = ["music", "radio"]
number_of_results = 10
page_size = 10
station_filters = [] # ['countrycode', 'language']
"""A list of filters to be applied to the search of radio stations. By default
@@ -100,8 +100,8 @@ def request(query, params):
args = {
"name": query,
"order": "votes",
"offset": (params["pageno"] - 1) * number_of_results,
"limit": number_of_results,
"offset": (params["pageno"] - 1) * page_size,
"limit": page_size,
"hidebroken": "true",
"reverse": "true",
}
-2
View File
@@ -54,6 +54,4 @@ def response(resp):
results.extend({'suggestion': s} for s in response_json['suggestions'])
results.append({'number_of_results': response_json['number_of_results']})
return results
+3 -3
View File
@@ -74,7 +74,7 @@ about = {
categories = ["science", "scientific publications"]
paging = True
nb_per_page = 10
page_size = 10
"""Number of results to return in the request, see `Pagination and Limits`_ for
more details.
@@ -109,8 +109,8 @@ def request(query: str, params: "OnlineParams") -> None:
args = {
"api_key": api_key,
"q": query,
"s": nb_per_page * (params["pageno"] - 1),
"p": nb_per_page,
"s": page_size * (params["pageno"] - 1),
"p": page_size,
}
params["url"] = f"{base_url}?{urlencode(args)}"
# For example, the ``year:`` filter requires a *Premium Plan* subscription.
-4
View File
@@ -211,8 +211,4 @@ def response(resp) -> EngineResults:
# append number of results
number_of_results = json_data.get('num_matches')
if number_of_results:
results.append({'number_of_results': number_of_results})
return results
+35 -18
View File
@@ -1,56 +1,73 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""MyMemory Translated"""
import typing as t
import urllib.parse
from searx.utils import html_to_text
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineDictParams
#
# about
about = {
"website": 'https://mymemory.translated.net/',
"website": "https://mymemory.translated.net/",
"wikidata_id": None,
"official_api_documentation": 'https://mymemory.translated.net/doc/spec.php',
"official_api_documentation": "https://mymemory.translated.net/doc/spec.php",
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
"results": "JSON",
}
engine_type = 'online_dictionary'
categories = ['general', 'translate']
engine_type = "online_dictionary"
categories = ["general", "translate"]
api_url = "https://api.mymemory.translated.net"
web_url = "https://mymemory.translated.net"
weight = 100
api_key = ''
api_key = ""
def request(query, params): # pylint: disable=unused-argument
def request(_: str, params: "OnlineDictParams") -> None:
args = {"q": params["query"], "langpair": f"{params['from_lang'][1]}|{params['to_lang'][1]}"}
args = {
"q": params["query"],
"langpair": f"{params['from_lang'][1]}|{params['to_lang'][1]}",
}
if api_key:
args["key"] = api_key
params['url'] = f"{api_url}/get?{urllib.parse.urlencode(args)}"
return params
def response(resp) -> EngineResults:
def response(resp: "SXNG_Response") -> EngineResults:
results = EngineResults()
data = resp.json()
data: dict[str, t.Any] = resp.json()
params: "OnlineDictParams" = resp.search_params # pyright: ignore[reportAssignmentType]
args = {
"q": resp.search_params["query"],
"lang": resp.search_params.get("searxng_locale", "en"), # ui language
"sl": resp.search_params['from_lang'][1],
"tl": resp.search_params['to_lang'][1],
"q": params["query"],
"lang": params.get("searxng_locale", "en"), # ui language
"sl": params["from_lang"][1],
"tl": params["to_lang"][1],
}
link = f"{web_url}/search.php?{urllib.parse.urlencode(args)}"
text = data['responseData']['translatedText']
text: str = html_to_text(data["responseData"]["translatedText"])
examples = [f"{m['segment']} : {m['translation']}" for m in data['matches'] if m['translation'] != text]
examples: set[str] = set()
match: dict[str, str]
for match in data["matches"]:
_text = html_to_text(match["translation"])
if _text != text:
_seg = html_to_text(match["segment"])
examples.add(f"{_seg} : {_text}")
item = results.types.Translations.Item(text=text, examples=examples)
item = results.types.Translations.Item(text=text, examples=list(examples))
results.add(results.types.Translations(translations=[item], url=link))
return results
+3 -3
View File
@@ -62,7 +62,7 @@ about = {
categories: list[str] = []
paging = True
number_of_results = 10
page_size = 10
wc_api_url = "https://commons.wikimedia.org/w/api.php"
wc_search_type: str = ""
@@ -107,8 +107,8 @@ def request(query: str, params: "OnlineParams") -> None:
"generator": "search",
"gsrnamespace": "6", # https://www.mediawiki.org/wiki/Help:Namespaces#Renaming_namespaces
"gsrprop": "snippet",
"gsrlimit": number_of_results,
"gsroffset": number_of_results * (params["pageno"] - 1),
"gsrlimit": page_size,
"gsroffset": page_size * (params["pageno"] - 1),
"gsrsearch": f"filetype:{filetype} {query}",
# imageinfo: https://commons.wikimedia.org/w/api.php?action=help&modules=query%2Bimageinfo
"iiprop": "url|size|mime",
+3 -3
View File
@@ -75,7 +75,7 @@ about = {
# engine dependent config
categories = ['general']
paging = True
number_of_results = 10
page_size = 10
http_digest_auth_user = ""
"""HTTP digest user for the local YACY instance"""
http_digest_auth_pass = ""
@@ -125,11 +125,11 @@ def _base_url() -> str:
def request(query, params):
offset = (params['pageno'] - 1) * number_of_results
offset = (params['pageno'] - 1) * page_size
args = {
'query': query,
'startRecord': offset,
'maximumRecords': number_of_results,
'maximumRecords': page_size,
'contentdom': search_type,
'resource': search_mode,
}
+2 -2
View File
@@ -30,7 +30,7 @@ import httpx
if typing.TYPE_CHECKING:
import searx.preferences
import searx.results
from searx.search.processors import OnlineParamTypes
from searx.search.processors import OnlineParamTypes, OnlineDictParams, OnlineCurrenciesParams
class SXNG_Request(flask.Request):
@@ -83,4 +83,4 @@ class SXNG_Response(httpx.Response):
"""
ok: bool
search_params: "OnlineParamTypes"
search_params: "OnlineParamTypes | OnlineDictParams | OnlineCurrenciesParams"
+18 -3
View File
@@ -19,6 +19,7 @@
__all__ = ["Result"]
import typing as t
import types
import re
import urllib.parse
@@ -29,7 +30,9 @@ from collections.abc import Callable
import msgspec
from searx import logger as log
from searx import logger
log = logger.getChild("result_types")
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
UNSET = object()
@@ -125,8 +128,20 @@ def _filter_urls(
if not url_src:
continue
try:
new_url = filter_func(result, field_name, url_src)
# log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
except Exception as exc: # pylint: disable=broad-exception-caught
# pylint: disable=no-member
_tb: types.TracebackType = exc.__traceback__.tb_next.tb_next # type: ignore
log.error(
"filter_urls (field '%s'): ignore %s from callback %s:%s",
field_name,
repr(exc),
_tb.tb_frame.f_code.co_filename,
_tb.tb_lineno,
)
continue
if isinstance(new_url, bool):
if new_url:
# log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)
@@ -529,7 +544,7 @@ class LegacyResult(dict[str, t.Any]):
# the img_src are equal.
return hash(f"{self.template}|{self.url}|{self.img_src}")
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "engine_data"]):
# Ordinary url-results are equal if their values for template,
# parsed_url (without schema) and img_src` are equal.
-25
View File
@@ -69,7 +69,6 @@ class ResultContainer:
self.answers = AnswerSet()
self.corrections = set()
self._number_of_results: list[int] = []
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
self._closed: bool = False
self.paging: bool = False
@@ -135,11 +134,6 @@ class ResultContainer:
self._merge_infobox(result)
continue
if "number_of_results" in result:
if self.on_result(result):
self._number_of_results.append(result["number_of_results"])
continue
if "engine_data" in result:
if self.on_result(result):
if result.engine:
@@ -252,25 +246,6 @@ class ResultContainer:
self._main_results_sorted = gresults
return self._main_results_sorted
@property
def number_of_results(self) -> int:
"""Returns the average of results number, returns zero if the average
result number is smaller than the actual result count."""
if not self._closed:
log.error("call to ResultContainer.number_of_results before ResultContainer.close")
return 0
with self._lock:
resultnum_sum = sum(self._number_of_results)
if not resultnum_sum or not self._number_of_results:
return 0
average = int(resultnum_sum / len(self._number_of_results))
if average < len(self.get_ordered_results()):
average = 0
return average
def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
with self._lock:
if self._closed:
+9 -2
View File
@@ -531,18 +531,22 @@ engines:
engine: bing
shortcut: bi
disabled: true
# base_url: https://cn.bing.com # for instances hosted in China
- name: bing images
engine: bing_images
shortcut: bii
# base_url: https://cn.bing.com # for instances hosted in China
- name: bing news
engine: bing_news
shortcut: bin
# base_url: https://cn.bing.com # for instances hosted in China
- name: bing videos
engine: bing_videos
shortcut: biv
# base_url: https://cn.bing.com # for instances hosted in China
- name: bitchute
engine: bitchute
@@ -886,6 +890,11 @@ engines:
shortcut: ftm
disabled: true
- name: flaticon
engine: flaticon
shortcut: fli
disabled: true
- name: flickr
categories: images
shortcut: fl
@@ -1391,7 +1400,6 @@ engines:
engine: metacpan
shortcut: cpan
disabled: true
number_of_results: 20
# https://docs.searxng.org/dev/engines/offline/search-indexer-engines.html#module-searx.engines.meilisearch
# - name: meilisearch
@@ -1790,7 +1798,6 @@ engines:
engine: qwant
shortcut: qw
categories: [general, web]
disabled: true
- name: qwant news
qwant_categ: news
@@ -7,9 +7,7 @@
<title>SearXNG search: {{ q|e }}</title>
<link>{{ url_for('search', _external=True) }}?q={{ q|e }}</link>
<description>Search results for "{{ q|e }}" - SearXNG</description>
<opensearch:totalResults>{{ number_of_results }}</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ opensearch_url }}"/>
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
{% if error_message %}
-4
View File
@@ -26,10 +26,6 @@
<div id="sidebar">
{%- if number_of_results != '0' -%}
<p id="result_count"><small>{{ _('Number of results') }}: {{ number_of_results }}</small></p>
{%- endif -%}
{%- if infoboxes -%}
<div id="infoboxes">
<details open class="sidebar-collapsible">
-4
View File
@@ -43,7 +43,6 @@ from flask.json import jsonify
from flask_babel import (
Babel,
gettext,
format_decimal,
)
import searx
@@ -564,7 +563,6 @@ def index_error(output_format: str, error_message: str):
'opensearch_response_rss.xml',
results=[],
q=sxng_request.form['q'] if 'q' in sxng_request.form else '',
number_of_results=0,
error_message=error_message,
)
return Response(response_rss, mimetype='text/xml')
@@ -724,7 +722,6 @@ def search():
'opensearch_response_rss.xml',
results=results,
q=sxng_request.form['q'],
number_of_results=result_container.number_of_results,
)
return Response(response_rss, mimetype='text/xml')
@@ -761,7 +758,6 @@ def search():
selected_categories = search_query.categories,
pageno = search_query.pageno,
time_range = search_query.time_range or '',
number_of_results = format_decimal(result_container.number_of_results),
suggestions = suggestion_urls,
answers = result_container.answers,
corrections = correction_urls,
-1
View File
@@ -163,7 +163,6 @@ def get_json_response(sq: "SearchQuery", rc: "ResultContainer") -> str:
"""Returns the JSON string of the results to a query (``application/json``)"""
data = {
'query': sq.query,
'number_of_results': rc.number_of_results,
'results': [_.as_dict() for _ in rc.get_ordered_results()],
'answers': [_.as_dict() for _ in rc.answers],
'corrections': list(rc.corrections),
-3
View File
@@ -57,7 +57,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
infoboxes=[],
unresponsive_engines=set(),
results=test_results,
number_of_results=3,
results_length=lambda: len(test_results),
get_timings=lambda: timings,
redirect_url=None,
@@ -161,8 +160,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
self.assertIn(b'<description>Search results for "test" - SearXNG</description>', result.data)
self.assertIn(b'<opensearch:totalResults>3</opensearch:totalResults>', result.data)
self.assertIn(b'<title>First Test</title>', result.data)
self.assertIn(b'<link>http://first.test.xyz</link>', result.data)