[unbload] drop meaningless field `number_of_results_xpath` from results (#6130)

In the result-list, the ``number_of_results`` indicate the number of hits in the
Index, they do not indicate how many results are in the answer.

In the past, search engines such as google or ddg had an indication on the first
page of a search term of how many hits there were for this term in total in
their index.

This info was added up in SearXNG and delivered under ``number_of_results``.
Nowadays the search engines no longer indicate how many hits there are in the
index and so this field in SearXNG is also superfluous.

- https://github.com/searxng/searxng/issues/2457#issuecomment-2566181574
- https://github.com/searxng/searxng/issues/2987
- https://github.com/searxng/searxng/issues/5034

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser
2026-05-25 12:43:02 +02:00
committed by GitHub
parent efc305b7f9
commit dd27fce3b7
15 changed files with 1 additions and 74 deletions
-9
View File
@@ -39,7 +39,6 @@ url_xpath = './h4/a/@href'
title_xpath = './h4/a[1]'
content_xpath = './/p[1]'
correction_xpath = '//*[@id="didYouMean"]//a'
number_of_results_xpath = '//*[@id="totalResults"]'
name_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@name'
value_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@value'
@@ -107,14 +106,6 @@ def response(resp):
for correction in eval_xpath_list(dom, correction_xpath):
results.append({'correction': extract_text(correction)})
# get number of results
number_of_results = eval_xpath(dom, number_of_results_xpath)
if number_of_results:
try:
results.append({'number_of_results': int(extract_text(number_of_results))})
except: # pylint: disable=bare-except
pass
# Update the tokens to the newest ones
token_str = _get_tokens(dom)
CACHE.set('ahmia-tokens', token_str, expire=60 * 60)
-7
View File
@@ -13,7 +13,6 @@ implementations are shared by other engines:
"""
import base64
import re
import typing as t
from urllib.parse import parse_qs, urlencode, urlparse
@@ -159,12 +158,6 @@ def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
results.append({"url": href, "title": title, "content": content})
if results:
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
result_len_container = re.sub(r"[^0-9]", "", result_len_container)
if result_len_container:
results.append({"number_of_results": int(result_len_container)})
return results
-1
View File
@@ -109,7 +109,6 @@ def search(query: str, params: "RequestParams") -> EngineResults:
kvmap=kvmap,
)
)
res.add(res.types.LegacyResult(number_of_results=count))
# cache counter value for 20sec
CACHE.set("count", count, expire=20)
-2
View File
@@ -176,6 +176,4 @@ def response(resp):
results.append(result)
results.append({'number_of_results': len(json_data['topics'])})
return results
-8
View File
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Duden"""
import re
from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
@@ -51,13 +50,6 @@ def response(resp):
dom = html.fromstring(resp.text)
number_of_results_element = eval_xpath_getindex(
dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None
)
if number_of_results_element is not None:
number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
results.append({'number_of_results': int(number_of_results_string)})
for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
url = urljoin(base_url, url)
-1
View File
@@ -93,7 +93,6 @@ def search(query, params) -> EngineResults:
query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page)
res.add(res.types.LegacyResult(number_of_results=query.count()))
for row in query:
del row['_id']
kvmap = {str(k): str(v) for k, v in row.items()}
-2
View File
@@ -54,6 +54,4 @@ def response(resp):
results.extend({'suggestion': s} for s in response_json['suggestions'])
results.append({'number_of_results': response_json['number_of_results']})
return results
-4
View File
@@ -211,8 +211,4 @@ def response(resp) -> EngineResults:
# append number of results
number_of_results = json_data.get('num_matches')
if number_of_results:
results.append({'number_of_results': number_of_results})
return results
+1 -1
View File
@@ -529,7 +529,7 @@ class LegacyResult(dict[str, t.Any]):
# the img_src are equal.
return hash(f"{self.template}|{self.url}|{self.img_src}")
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "engine_data"]):
# Ordinary url-results are equal if their values for template,
# parsed_url (without schema) and img_src` are equal.
-25
View File
@@ -69,7 +69,6 @@ class ResultContainer:
self.answers = AnswerSet()
self.corrections = set()
self._number_of_results: list[int] = []
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
self._closed: bool = False
self.paging: bool = False
@@ -135,11 +134,6 @@ class ResultContainer:
self._merge_infobox(result)
continue
if "number_of_results" in result:
if self.on_result(result):
self._number_of_results.append(result["number_of_results"])
continue
if "engine_data" in result:
if self.on_result(result):
if result.engine:
@@ -252,25 +246,6 @@ class ResultContainer:
self._main_results_sorted = gresults
return self._main_results_sorted
@property
def number_of_results(self) -> int:
"""Returns the average of results number, returns zero if the average
result number is smaller than the actual result count."""
if not self._closed:
log.error("call to ResultContainer.number_of_results before ResultContainer.close")
return 0
with self._lock:
resultnum_sum = sum(self._number_of_results)
if not resultnum_sum or not self._number_of_results:
return 0
average = int(resultnum_sum / len(self._number_of_results))
if average < len(self.get_ordered_results()):
average = 0
return average
def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
with self._lock:
if self._closed:
@@ -7,9 +7,7 @@
<title>SearXNG search: {{ q|e }}</title>
<link>{{ url_for('search', _external=True) }}?q={{ q|e }}</link>
<description>Search results for "{{ q|e }}" - SearXNG</description>
<opensearch:totalResults>{{ number_of_results }}</opensearch:totalResults>
<opensearch:startIndex>1</opensearch:startIndex>
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ opensearch_url }}"/>
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
{% if error_message %}
-4
View File
@@ -26,10 +26,6 @@
<div id="sidebar">
{%- if number_of_results != '0' -%}
<p id="result_count"><small>{{ _('Number of results') }}: {{ number_of_results }}</small></p>
{%- endif -%}
{%- if infoboxes -%}
<div id="infoboxes">
<details open class="sidebar-collapsible">
-4
View File
@@ -43,7 +43,6 @@ from flask.json import jsonify
from flask_babel import (
Babel,
gettext,
format_decimal,
)
import searx
@@ -564,7 +563,6 @@ def index_error(output_format: str, error_message: str):
'opensearch_response_rss.xml',
results=[],
q=sxng_request.form['q'] if 'q' in sxng_request.form else '',
number_of_results=0,
error_message=error_message,
)
return Response(response_rss, mimetype='text/xml')
@@ -724,7 +722,6 @@ def search():
'opensearch_response_rss.xml',
results=results,
q=sxng_request.form['q'],
number_of_results=result_container.number_of_results,
)
return Response(response_rss, mimetype='text/xml')
@@ -761,7 +758,6 @@ def search():
selected_categories = search_query.categories,
pageno = search_query.pageno,
time_range = search_query.time_range or '',
number_of_results = format_decimal(result_container.number_of_results),
suggestions = suggestion_urls,
answers = result_container.answers,
corrections = correction_urls,
-1
View File
@@ -163,7 +163,6 @@ def get_json_response(sq: "SearchQuery", rc: "ResultContainer") -> str:
"""Returns the JSON string of the results to a query (``application/json``)"""
data = {
'query': sq.query,
'number_of_results': rc.number_of_results,
'results': [_.as_dict() for _ in rc.get_ordered_results()],
'answers': [_.as_dict() for _ in rc.answers],
'corrections': list(rc.corrections),
-3
View File
@@ -57,7 +57,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
infoboxes=[],
unresponsive_engines=set(),
results=test_results,
number_of_results=3,
results_length=lambda: len(test_results),
get_timings=lambda: timings,
redirect_url=None,
@@ -161,8 +160,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
self.assertIn(b'<description>Search results for "test" - SearXNG</description>', result.data)
self.assertIn(b'<opensearch:totalResults>3</opensearch:totalResults>', result.data)
self.assertIn(b'<title>First Test</title>', result.data)
self.assertIn(b'<link>http://first.test.xyz</link>', result.data)