mirror of
https://github.com/searxng/searxng.git
synced 2026-06-22 17:48:33 +02:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5bae05514b | |||
| 00ca5776f2 | |||
| 577f5f2f30 | |||
| 253dc86c10 | |||
| 3066bc19eb |
@@ -41,7 +41,9 @@ safesearch_cookies = {0: "-2", 1: None, 2: "1"}
|
||||
safesearch_args = {0: "1", 1: None, 2: "1"}
|
||||
|
||||
search_path_map = {"images": "i", "videos": "v", "news": "news"}
|
||||
|
||||
_HTTP_User_Agent: str = gen_useragent()
|
||||
send_accept_language_header = False
|
||||
|
||||
|
||||
def init(engine_settings: dict[str, t.Any]):
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Public domain image archive"""
|
||||
|
||||
import re
|
||||
|
||||
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
||||
from json import dumps
|
||||
|
||||
@@ -49,6 +51,8 @@ paging = True
|
||||
|
||||
__CACHED_API_URL = None
|
||||
|
||||
_API_URL_RE = re.compile(r"\"(https://.*?/search-proxy)\"")
|
||||
|
||||
|
||||
def _clean_url(url):
|
||||
parsed = urlparse(url)
|
||||
@@ -74,11 +78,12 @@ def _get_algolia_api_url():
|
||||
if resp.status_code != 200:
|
||||
raise LookupError("Failed to obtain AWS api url for PDImageArchive")
|
||||
|
||||
api_url = extr(resp.text, 'const r="', '"', default=None)
|
||||
|
||||
if api_url is None:
|
||||
api_url_match = _API_URL_RE.search(resp.text)
|
||||
if api_url_match is None:
|
||||
raise LookupError("Couldn't obtain AWS api url for PDImageArchive")
|
||||
|
||||
api_url = api_url_match.group(1)
|
||||
|
||||
__CACHED_API_URL = api_url
|
||||
return api_url
|
||||
|
||||
|
||||
@@ -174,6 +174,10 @@ number, but an offset.'''
|
||||
first_page_num = 1
|
||||
'''Number of the first page (usually 0 or 1).'''
|
||||
|
||||
send_page_num_on_first_page = True
|
||||
'''Whether to include the page number in the request for the first page.
|
||||
This can help if an engine blocks request that send a page number for the first page.'''
|
||||
|
||||
time_range_support = False
|
||||
'''Engine supports search time range.'''
|
||||
|
||||
@@ -238,10 +242,14 @@ def request(query, params):
|
||||
if safe_search_val is not None:
|
||||
safe_search = safe_search_map[safe_search_val]
|
||||
|
||||
pageno = ""
|
||||
if send_page_num_on_first_page or params["pageno"] != 1:
|
||||
pageno = (params['pageno'] - 1) * page_size + first_page_num
|
||||
|
||||
fargs = {
|
||||
'query': urlencode({'q': query})[2:],
|
||||
'lang': lang,
|
||||
'pageno': (params['pageno'] - 1) * page_size + first_page_num,
|
||||
'pageno': pageno,
|
||||
'time_range': time_range,
|
||||
'safe_search': safe_search,
|
||||
}
|
||||
|
||||
@@ -152,7 +152,6 @@ class OnlineProcessor(EngineProcessor):
|
||||
# add Accept-Language header
|
||||
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language
|
||||
|
||||
headers["Accept-Language"] = "en,en-US;q=0.7,en;q=0.3"
|
||||
if self.engine.send_accept_language_header and search_query.locale:
|
||||
_l = search_query.locale.language
|
||||
_t = search_query.locale.territory or _l
|
||||
|
||||
@@ -964,6 +964,21 @@ engines:
|
||||
timeout: 8.0
|
||||
disabled: true
|
||||
|
||||
- name: gabanza
|
||||
engine: xpath
|
||||
search_url: https://www.gabanza.com/search?query={query}
|
||||
shortcut: gab
|
||||
disabled: true
|
||||
results_xpath: //div[contains(@class, "border-t")]/div/div
|
||||
url_xpath: .//a/@href
|
||||
title_xpath: ./a
|
||||
content_xpath: .//p
|
||||
about:
|
||||
website: https://www.gabanza.com
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: HTML
|
||||
|
||||
- name: geizhals
|
||||
engine: geizhals
|
||||
shortcut: geiz
|
||||
@@ -2593,6 +2608,23 @@ engines:
|
||||
shortcut: wttr
|
||||
timeout: 9.0
|
||||
|
||||
- name: zapmeta
|
||||
engine: xpath
|
||||
shortcut: zpm
|
||||
search_url: https://www.zapmeta.com/search?q={query}&pg={pageno}
|
||||
results_xpath: //article[contains(@class, "organic-results-item")]
|
||||
url_xpath: ./h2/a/@href
|
||||
title_xpath: ./h2
|
||||
content_xpath: ./p
|
||||
paging: true
|
||||
send_page_num_on_first_page: false # otherwise blocks requests
|
||||
disabled: true
|
||||
about:
|
||||
website: https://www.zapmeta.com/
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: HTML
|
||||
|
||||
- name: braveapi
|
||||
engine: braveapi
|
||||
# read https://docs.searxng.org/dev/engines/online/brave.html
|
||||
|
||||
Reference in New Issue
Block a user