mirror of
https://github.com/searxng/searxng.git
synced 2026-05-26 21:00:13 +02:00
dd27fce3b7
In the result-list, the ``number_of_results`` indicate the number of hits in the Index, they do not indicate how many results are in the answer. In the past, search engines such as google or ddg had an indication on the first page of a search term of how many hits there were for this term in total in their index. This info was added up in SearXNG and delivered under ``number_of_results``. Nowadays the search engines no longer indicate how many hits there are in the index and so this field in SearXNG is also superfluous. - https://github.com/searxng/searxng/issues/2457#issuecomment-2566181574 - https://github.com/searxng/searxng/issues/2987 - https://github.com/searxng/searxng/issues/5034 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
62 lines
1.8 KiB
Python
62 lines
1.8 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Duden"""
|
|
|
|
from urllib.parse import quote, urljoin
|
|
from lxml import html
|
|
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
|
from searx.network import raise_for_httperror
|
|
|
|
# about
|
|
about = {
|
|
"website": 'https://www.duden.de',
|
|
"wikidata_id": 'Q73624591',
|
|
"official_api_documentation": None,
|
|
"use_official_api": False,
|
|
"require_api_key": False,
|
|
"results": 'HTML',
|
|
"language": 'de',
|
|
}
|
|
|
|
categories = ['dictionaries']
|
|
paging = True
|
|
|
|
# search-url
|
|
base_url = 'https://www.duden.de/'
|
|
search_url = base_url + 'suchen/dudenonline/{query}?search_api_fulltext=&page={offset}'
|
|
|
|
|
|
def request(query, params):
|
|
|
|
offset = params['pageno'] - 1
|
|
if offset == 0:
|
|
search_url_fmt = base_url + 'suchen/dudenonline/{query}'
|
|
params['url'] = search_url_fmt.format(query=quote(query))
|
|
else:
|
|
params['url'] = search_url.format(offset=offset, query=quote(query))
|
|
# after the last page of results, spelling corrections are returned after a HTTP redirect
|
|
# whatever the page number is
|
|
params['soft_max_redirects'] = 1
|
|
params['raise_for_httperror'] = False
|
|
return params
|
|
|
|
|
|
def response(resp):
|
|
results = []
|
|
|
|
if resp.status_code == 404:
|
|
return results
|
|
|
|
raise_for_httperror(resp)
|
|
|
|
dom = html.fromstring(resp.text)
|
|
|
|
for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
|
|
url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
|
|
url = urljoin(base_url, url)
|
|
title = eval_xpath(result, 'string(.//h2/a)').strip()
|
|
content = extract_text(eval_xpath(result, './/p'))
|
|
# append result
|
|
results.append({'url': url, 'title': title, 'content': content})
|
|
|
|
return results
|