mirror of
https://github.com/searxng/searxng.git
synced 2026-05-26 04:40:12 +02:00
[unbload] drop meaningless field `number_of_results_xpath` from results (#6130)
In the result-list, the ``number_of_results`` indicate the number of hits in the Index, they do not indicate how many results are in the answer. In the past, search engines such as google or ddg had an indication on the first page of a search term of how many hits there were for this term in total in their index. This info was added up in SearXNG and delivered under ``number_of_results``. Nowadays the search engines no longer indicate how many hits there are in the index and so this field in SearXNG is also superfluous. - https://github.com/searxng/searxng/issues/2457#issuecomment-2566181574 - https://github.com/searxng/searxng/issues/2987 - https://github.com/searxng/searxng/issues/5034 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
@@ -39,7 +39,6 @@ url_xpath = './h4/a/@href'
|
|||||||
title_xpath = './h4/a[1]'
|
title_xpath = './h4/a[1]'
|
||||||
content_xpath = './/p[1]'
|
content_xpath = './/p[1]'
|
||||||
correction_xpath = '//*[@id="didYouMean"]//a'
|
correction_xpath = '//*[@id="didYouMean"]//a'
|
||||||
number_of_results_xpath = '//*[@id="totalResults"]'
|
|
||||||
name_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@name'
|
name_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@name'
|
||||||
value_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@value'
|
value_token_xpath = '//form[@id="searchForm"]/input[@type="hidden"]/@value'
|
||||||
|
|
||||||
@@ -107,14 +106,6 @@ def response(resp):
|
|||||||
for correction in eval_xpath_list(dom, correction_xpath):
|
for correction in eval_xpath_list(dom, correction_xpath):
|
||||||
results.append({'correction': extract_text(correction)})
|
results.append({'correction': extract_text(correction)})
|
||||||
|
|
||||||
# get number of results
|
|
||||||
number_of_results = eval_xpath(dom, number_of_results_xpath)
|
|
||||||
if number_of_results:
|
|
||||||
try:
|
|
||||||
results.append({'number_of_results': int(extract_text(number_of_results))})
|
|
||||||
except: # pylint: disable=bare-except
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Update the tokens to the newest ones
|
# Update the tokens to the newest ones
|
||||||
token_str = _get_tokens(dom)
|
token_str = _get_tokens(dom)
|
||||||
CACHE.set('ahmia-tokens', token_str, expire=60 * 60)
|
CACHE.set('ahmia-tokens', token_str, expire=60 * 60)
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ implementations are shared by other engines:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import re
|
|
||||||
import typing as t
|
import typing as t
|
||||||
from urllib.parse import parse_qs, urlencode, urlparse
|
from urllib.parse import parse_qs, urlencode, urlparse
|
||||||
|
|
||||||
@@ -159,12 +158,6 @@ def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
|
|||||||
|
|
||||||
results.append({"url": href, "title": title, "content": content})
|
results.append({"url": href, "title": title, "content": content})
|
||||||
|
|
||||||
if results:
|
|
||||||
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
|
||||||
result_len_container = re.sub(r"[^0-9]", "", result_len_container)
|
|
||||||
if result_len_container:
|
|
||||||
results.append({"number_of_results": int(result_len_container)})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -109,7 +109,6 @@ def search(query: str, params: "RequestParams") -> EngineResults:
|
|||||||
kvmap=kvmap,
|
kvmap=kvmap,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
res.add(res.types.LegacyResult(number_of_results=count))
|
|
||||||
|
|
||||||
# cache counter value for 20sec
|
# cache counter value for 20sec
|
||||||
CACHE.set("count", count, expire=20)
|
CACHE.set("count", count, expire=20)
|
||||||
|
|||||||
@@ -176,6 +176,4 @@ def response(resp):
|
|||||||
|
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
results.append({'number_of_results': len(json_data['topics'])})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Duden"""
|
"""Duden"""
|
||||||
|
|
||||||
import re
|
|
||||||
from urllib.parse import quote, urljoin
|
from urllib.parse import quote, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||||
@@ -51,13 +50,6 @@ def response(resp):
|
|||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
number_of_results_element = eval_xpath_getindex(
|
|
||||||
dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None
|
|
||||||
)
|
|
||||||
if number_of_results_element is not None:
|
|
||||||
number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
|
|
||||||
results.append({'number_of_results': int(number_of_results_string)})
|
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
|
for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
|
||||||
url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
|
url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
|
||||||
url = urljoin(base_url, url)
|
url = urljoin(base_url, url)
|
||||||
|
|||||||
@@ -93,7 +93,6 @@ def search(query, params) -> EngineResults:
|
|||||||
|
|
||||||
query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page)
|
query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page)
|
||||||
|
|
||||||
res.add(res.types.LegacyResult(number_of_results=query.count()))
|
|
||||||
for row in query:
|
for row in query:
|
||||||
del row['_id']
|
del row['_id']
|
||||||
kvmap = {str(k): str(v) for k, v in row.items()}
|
kvmap = {str(k): str(v) for k, v in row.items()}
|
||||||
|
|||||||
@@ -54,6 +54,4 @@ def response(resp):
|
|||||||
|
|
||||||
results.extend({'suggestion': s} for s in response_json['suggestions'])
|
results.extend({'suggestion': s} for s in response_json['suggestions'])
|
||||||
|
|
||||||
results.append({'number_of_results': response_json['number_of_results']})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -211,8 +211,4 @@ def response(resp) -> EngineResults:
|
|||||||
|
|
||||||
# append number of results
|
# append number of results
|
||||||
|
|
||||||
number_of_results = json_data.get('num_matches')
|
|
||||||
if number_of_results:
|
|
||||||
results.append({'number_of_results': number_of_results})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -529,7 +529,7 @@ class LegacyResult(dict[str, t.Any]):
|
|||||||
# the img_src are equal.
|
# the img_src are equal.
|
||||||
return hash(f"{self.template}|{self.url}|{self.img_src}")
|
return hash(f"{self.template}|{self.url}|{self.img_src}")
|
||||||
|
|
||||||
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
|
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "engine_data"]):
|
||||||
# Ordinary url-results are equal if their values for template,
|
# Ordinary url-results are equal if their values for template,
|
||||||
# parsed_url (without schema) and img_src` are equal.
|
# parsed_url (without schema) and img_src` are equal.
|
||||||
|
|
||||||
|
|||||||
@@ -69,7 +69,6 @@ class ResultContainer:
|
|||||||
self.answers = AnswerSet()
|
self.answers = AnswerSet()
|
||||||
self.corrections = set()
|
self.corrections = set()
|
||||||
|
|
||||||
self._number_of_results: list[int] = []
|
|
||||||
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
|
self.engine_data: dict[str, dict[str, str]] = defaultdict(dict)
|
||||||
self._closed: bool = False
|
self._closed: bool = False
|
||||||
self.paging: bool = False
|
self.paging: bool = False
|
||||||
@@ -135,11 +134,6 @@ class ResultContainer:
|
|||||||
self._merge_infobox(result)
|
self._merge_infobox(result)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if "number_of_results" in result:
|
|
||||||
if self.on_result(result):
|
|
||||||
self._number_of_results.append(result["number_of_results"])
|
|
||||||
continue
|
|
||||||
|
|
||||||
if "engine_data" in result:
|
if "engine_data" in result:
|
||||||
if self.on_result(result):
|
if self.on_result(result):
|
||||||
if result.engine:
|
if result.engine:
|
||||||
@@ -252,25 +246,6 @@ class ResultContainer:
|
|||||||
self._main_results_sorted = gresults
|
self._main_results_sorted = gresults
|
||||||
return self._main_results_sorted
|
return self._main_results_sorted
|
||||||
|
|
||||||
@property
|
|
||||||
def number_of_results(self) -> int:
|
|
||||||
"""Returns the average of results number, returns zero if the average
|
|
||||||
result number is smaller than the actual result count."""
|
|
||||||
|
|
||||||
if not self._closed:
|
|
||||||
log.error("call to ResultContainer.number_of_results before ResultContainer.close")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
with self._lock:
|
|
||||||
resultnum_sum = sum(self._number_of_results)
|
|
||||||
if not resultnum_sum or not self._number_of_results:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
average = int(resultnum_sum / len(self._number_of_results))
|
|
||||||
if average < len(self.get_ordered_results()):
|
|
||||||
average = 0
|
|
||||||
return average
|
|
||||||
|
|
||||||
def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
|
def add_unresponsive_engine(self, engine_name: str, error_type: str, suspended: bool = False):
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if self._closed:
|
if self._closed:
|
||||||
|
|||||||
@@ -7,9 +7,7 @@
|
|||||||
<title>SearXNG search: {{ q|e }}</title>
|
<title>SearXNG search: {{ q|e }}</title>
|
||||||
<link>{{ url_for('search', _external=True) }}?q={{ q|e }}</link>
|
<link>{{ url_for('search', _external=True) }}?q={{ q|e }}</link>
|
||||||
<description>Search results for "{{ q|e }}" - SearXNG</description>
|
<description>Search results for "{{ q|e }}" - SearXNG</description>
|
||||||
<opensearch:totalResults>{{ number_of_results }}</opensearch:totalResults>
|
|
||||||
<opensearch:startIndex>1</opensearch:startIndex>
|
<opensearch:startIndex>1</opensearch:startIndex>
|
||||||
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
|
|
||||||
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ opensearch_url }}"/>
|
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ opensearch_url }}"/>
|
||||||
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
|
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
|
||||||
{% if error_message %}
|
{% if error_message %}
|
||||||
|
|||||||
@@ -26,10 +26,6 @@
|
|||||||
|
|
||||||
<div id="sidebar">
|
<div id="sidebar">
|
||||||
|
|
||||||
{%- if number_of_results != '0' -%}
|
|
||||||
<p id="result_count"><small>{{ _('Number of results') }}: {{ number_of_results }}</small></p>
|
|
||||||
{%- endif -%}
|
|
||||||
|
|
||||||
{%- if infoboxes -%}
|
{%- if infoboxes -%}
|
||||||
<div id="infoboxes">
|
<div id="infoboxes">
|
||||||
<details open class="sidebar-collapsible">
|
<details open class="sidebar-collapsible">
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ from flask.json import jsonify
|
|||||||
from flask_babel import (
|
from flask_babel import (
|
||||||
Babel,
|
Babel,
|
||||||
gettext,
|
gettext,
|
||||||
format_decimal,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
import searx
|
import searx
|
||||||
@@ -564,7 +563,6 @@ def index_error(output_format: str, error_message: str):
|
|||||||
'opensearch_response_rss.xml',
|
'opensearch_response_rss.xml',
|
||||||
results=[],
|
results=[],
|
||||||
q=sxng_request.form['q'] if 'q' in sxng_request.form else '',
|
q=sxng_request.form['q'] if 'q' in sxng_request.form else '',
|
||||||
number_of_results=0,
|
|
||||||
error_message=error_message,
|
error_message=error_message,
|
||||||
)
|
)
|
||||||
return Response(response_rss, mimetype='text/xml')
|
return Response(response_rss, mimetype='text/xml')
|
||||||
@@ -724,7 +722,6 @@ def search():
|
|||||||
'opensearch_response_rss.xml',
|
'opensearch_response_rss.xml',
|
||||||
results=results,
|
results=results,
|
||||||
q=sxng_request.form['q'],
|
q=sxng_request.form['q'],
|
||||||
number_of_results=result_container.number_of_results,
|
|
||||||
)
|
)
|
||||||
return Response(response_rss, mimetype='text/xml')
|
return Response(response_rss, mimetype='text/xml')
|
||||||
|
|
||||||
@@ -761,7 +758,6 @@ def search():
|
|||||||
selected_categories = search_query.categories,
|
selected_categories = search_query.categories,
|
||||||
pageno = search_query.pageno,
|
pageno = search_query.pageno,
|
||||||
time_range = search_query.time_range or '',
|
time_range = search_query.time_range or '',
|
||||||
number_of_results = format_decimal(result_container.number_of_results),
|
|
||||||
suggestions = suggestion_urls,
|
suggestions = suggestion_urls,
|
||||||
answers = result_container.answers,
|
answers = result_container.answers,
|
||||||
corrections = correction_urls,
|
corrections = correction_urls,
|
||||||
|
|||||||
@@ -163,7 +163,6 @@ def get_json_response(sq: "SearchQuery", rc: "ResultContainer") -> str:
|
|||||||
"""Returns the JSON string of the results to a query (``application/json``)"""
|
"""Returns the JSON string of the results to a query (``application/json``)"""
|
||||||
data = {
|
data = {
|
||||||
'query': sq.query,
|
'query': sq.query,
|
||||||
'number_of_results': rc.number_of_results,
|
|
||||||
'results': [_.as_dict() for _ in rc.get_ordered_results()],
|
'results': [_.as_dict() for _ in rc.get_ordered_results()],
|
||||||
'answers': [_.as_dict() for _ in rc.answers],
|
'answers': [_.as_dict() for _ in rc.answers],
|
||||||
'corrections': list(rc.corrections),
|
'corrections': list(rc.corrections),
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
|
|||||||
infoboxes=[],
|
infoboxes=[],
|
||||||
unresponsive_engines=set(),
|
unresponsive_engines=set(),
|
||||||
results=test_results,
|
results=test_results,
|
||||||
number_of_results=3,
|
|
||||||
results_length=lambda: len(test_results),
|
results_length=lambda: len(test_results),
|
||||||
get_timings=lambda: timings,
|
get_timings=lambda: timings,
|
||||||
redirect_url=None,
|
redirect_url=None,
|
||||||
@@ -161,8 +160,6 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
|
|||||||
|
|
||||||
self.assertIn(b'<description>Search results for "test" - SearXNG</description>', result.data)
|
self.assertIn(b'<description>Search results for "test" - SearXNG</description>', result.data)
|
||||||
|
|
||||||
self.assertIn(b'<opensearch:totalResults>3</opensearch:totalResults>', result.data)
|
|
||||||
|
|
||||||
self.assertIn(b'<title>First Test</title>', result.data)
|
self.assertIn(b'<title>First Test</title>', result.data)
|
||||||
|
|
||||||
self.assertIn(b'<link>http://first.test.xyz</link>', result.data)
|
self.assertIn(b'<link>http://first.test.xyz</link>', result.data)
|
||||||
|
|||||||
Reference in New Issue
Block a user