mirror of
https://github.com/searxng/searxng.git
synced 2026-06-22 09:38:34 +02:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5bae05514b | |||
| 00ca5776f2 | |||
| 577f5f2f30 | |||
| 253dc86c10 | |||
| 3066bc19eb |
@@ -41,7 +41,9 @@ safesearch_cookies = {0: "-2", 1: None, 2: "1"}
|
|||||||
safesearch_args = {0: "1", 1: None, 2: "1"}
|
safesearch_args = {0: "1", 1: None, 2: "1"}
|
||||||
|
|
||||||
search_path_map = {"images": "i", "videos": "v", "news": "news"}
|
search_path_map = {"images": "i", "videos": "v", "news": "news"}
|
||||||
|
|
||||||
_HTTP_User_Agent: str = gen_useragent()
|
_HTTP_User_Agent: str = gen_useragent()
|
||||||
|
send_accept_language_header = False
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings: dict[str, t.Any]):
|
def init(engine_settings: dict[str, t.Any]):
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Public domain image archive"""
|
"""Public domain image archive"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
||||||
from json import dumps
|
from json import dumps
|
||||||
|
|
||||||
@@ -49,6 +51,8 @@ paging = True
|
|||||||
|
|
||||||
__CACHED_API_URL = None
|
__CACHED_API_URL = None
|
||||||
|
|
||||||
|
_API_URL_RE = re.compile(r"\"(https://.*?/search-proxy)\"")
|
||||||
|
|
||||||
|
|
||||||
def _clean_url(url):
|
def _clean_url(url):
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
@@ -74,11 +78,12 @@ def _get_algolia_api_url():
|
|||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
raise LookupError("Failed to obtain AWS api url for PDImageArchive")
|
raise LookupError("Failed to obtain AWS api url for PDImageArchive")
|
||||||
|
|
||||||
api_url = extr(resp.text, 'const r="', '"', default=None)
|
api_url_match = _API_URL_RE.search(resp.text)
|
||||||
|
if api_url_match is None:
|
||||||
if api_url is None:
|
|
||||||
raise LookupError("Couldn't obtain AWS api url for PDImageArchive")
|
raise LookupError("Couldn't obtain AWS api url for PDImageArchive")
|
||||||
|
|
||||||
|
api_url = api_url_match.group(1)
|
||||||
|
|
||||||
__CACHED_API_URL = api_url
|
__CACHED_API_URL = api_url
|
||||||
return api_url
|
return api_url
|
||||||
|
|
||||||
|
|||||||
@@ -174,6 +174,10 @@ number, but an offset.'''
|
|||||||
first_page_num = 1
|
first_page_num = 1
|
||||||
'''Number of the first page (usually 0 or 1).'''
|
'''Number of the first page (usually 0 or 1).'''
|
||||||
|
|
||||||
|
send_page_num_on_first_page = True
|
||||||
|
'''Whether to include the page number in the request for the first page.
|
||||||
|
This can help if an engine blocks request that send a page number for the first page.'''
|
||||||
|
|
||||||
time_range_support = False
|
time_range_support = False
|
||||||
'''Engine supports search time range.'''
|
'''Engine supports search time range.'''
|
||||||
|
|
||||||
@@ -238,10 +242,14 @@ def request(query, params):
|
|||||||
if safe_search_val is not None:
|
if safe_search_val is not None:
|
||||||
safe_search = safe_search_map[safe_search_val]
|
safe_search = safe_search_map[safe_search_val]
|
||||||
|
|
||||||
|
pageno = ""
|
||||||
|
if send_page_num_on_first_page or params["pageno"] != 1:
|
||||||
|
pageno = (params['pageno'] - 1) * page_size + first_page_num
|
||||||
|
|
||||||
fargs = {
|
fargs = {
|
||||||
'query': urlencode({'q': query})[2:],
|
'query': urlencode({'q': query})[2:],
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'pageno': (params['pageno'] - 1) * page_size + first_page_num,
|
'pageno': pageno,
|
||||||
'time_range': time_range,
|
'time_range': time_range,
|
||||||
'safe_search': safe_search,
|
'safe_search': safe_search,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -152,7 +152,6 @@ class OnlineProcessor(EngineProcessor):
|
|||||||
# add Accept-Language header
|
# add Accept-Language header
|
||||||
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language
|
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language
|
||||||
|
|
||||||
headers["Accept-Language"] = "en,en-US;q=0.7,en;q=0.3"
|
|
||||||
if self.engine.send_accept_language_header and search_query.locale:
|
if self.engine.send_accept_language_header and search_query.locale:
|
||||||
_l = search_query.locale.language
|
_l = search_query.locale.language
|
||||||
_t = search_query.locale.territory or _l
|
_t = search_query.locale.territory or _l
|
||||||
|
|||||||
@@ -964,6 +964,21 @@ engines:
|
|||||||
timeout: 8.0
|
timeout: 8.0
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
|
- name: gabanza
|
||||||
|
engine: xpath
|
||||||
|
search_url: https://www.gabanza.com/search?query={query}
|
||||||
|
shortcut: gab
|
||||||
|
disabled: true
|
||||||
|
results_xpath: //div[contains(@class, "border-t")]/div/div
|
||||||
|
url_xpath: .//a/@href
|
||||||
|
title_xpath: ./a
|
||||||
|
content_xpath: .//p
|
||||||
|
about:
|
||||||
|
website: https://www.gabanza.com
|
||||||
|
use_official_api: false
|
||||||
|
require_api_key: false
|
||||||
|
results: HTML
|
||||||
|
|
||||||
- name: geizhals
|
- name: geizhals
|
||||||
engine: geizhals
|
engine: geizhals
|
||||||
shortcut: geiz
|
shortcut: geiz
|
||||||
@@ -2593,6 +2608,23 @@ engines:
|
|||||||
shortcut: wttr
|
shortcut: wttr
|
||||||
timeout: 9.0
|
timeout: 9.0
|
||||||
|
|
||||||
|
- name: zapmeta
|
||||||
|
engine: xpath
|
||||||
|
shortcut: zpm
|
||||||
|
search_url: https://www.zapmeta.com/search?q={query}&pg={pageno}
|
||||||
|
results_xpath: //article[contains(@class, "organic-results-item")]
|
||||||
|
url_xpath: ./h2/a/@href
|
||||||
|
title_xpath: ./h2
|
||||||
|
content_xpath: ./p
|
||||||
|
paging: true
|
||||||
|
send_page_num_on_first_page: false # otherwise blocks requests
|
||||||
|
disabled: true
|
||||||
|
about:
|
||||||
|
website: https://www.zapmeta.com/
|
||||||
|
use_official_api: false
|
||||||
|
require_api_key: false
|
||||||
|
results: HTML
|
||||||
|
|
||||||
- name: braveapi
|
- name: braveapi
|
||||||
engine: braveapi
|
engine: braveapi
|
||||||
# read https://docs.searxng.org/dev/engines/online/brave.html
|
# read https://docs.searxng.org/dev/engines/online/brave.html
|
||||||
|
|||||||
Reference in New Issue
Block a user