Compare commits

...

5 Commits

5 changed files with 51 additions and 5 deletions
+2
View File
@@ -41,7 +41,9 @@ safesearch_cookies = {0: "-2", 1: None, 2: "1"}
safesearch_args = {0: "1", 1: None, 2: "1"}
search_path_map = {"images": "i", "videos": "v", "news": "news"}
_HTTP_User_Agent: str = gen_useragent()
send_accept_language_header = False
def init(engine_settings: dict[str, t.Any]):
+8 -3
View File
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Public domain image archive"""
import re
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import dumps
@@ -49,6 +51,8 @@ paging = True
__CACHED_API_URL = None
_API_URL_RE = re.compile(r"\"(https://.*?/search-proxy)\"")
def _clean_url(url):
parsed = urlparse(url)
@@ -74,11 +78,12 @@ def _get_algolia_api_url():
if resp.status_code != 200:
raise LookupError("Failed to obtain AWS api url for PDImageArchive")
api_url = extr(resp.text, 'const r="', '"', default=None)
if api_url is None:
api_url_match = _API_URL_RE.search(resp.text)
if api_url_match is None:
raise LookupError("Couldn't obtain AWS api url for PDImageArchive")
api_url = api_url_match.group(1)
__CACHED_API_URL = api_url
return api_url
+9 -1
View File
@@ -174,6 +174,10 @@ number, but an offset.'''
first_page_num = 1
'''Number of the first page (usually 0 or 1).'''
send_page_num_on_first_page = True
'''Whether to include the page number in the request for the first page.
This can help if an engine blocks request that send a page number for the first page.'''
time_range_support = False
'''Engine supports search time range.'''
@@ -238,10 +242,14 @@ def request(query, params):
if safe_search_val is not None:
safe_search = safe_search_map[safe_search_val]
pageno = ""
if send_page_num_on_first_page or params["pageno"] != 1:
pageno = (params['pageno'] - 1) * page_size + first_page_num
fargs = {
'query': urlencode({'q': query})[2:],
'lang': lang,
'pageno': (params['pageno'] - 1) * page_size + first_page_num,
'pageno': pageno,
'time_range': time_range,
'safe_search': safe_search,
}
-1
View File
@@ -152,7 +152,6 @@ class OnlineProcessor(EngineProcessor):
# add Accept-Language header
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language
headers["Accept-Language"] = "en,en-US;q=0.7,en;q=0.3"
if self.engine.send_accept_language_header and search_query.locale:
_l = search_query.locale.language
_t = search_query.locale.territory or _l
+32
View File
@@ -964,6 +964,21 @@ engines:
timeout: 8.0
disabled: true
- name: gabanza
engine: xpath
search_url: https://www.gabanza.com/search?query={query}
shortcut: gab
disabled: true
results_xpath: //div[contains(@class, "border-t")]/div/div
url_xpath: .//a/@href
title_xpath: ./a
content_xpath: .//p
about:
website: https://www.gabanza.com
use_official_api: false
require_api_key: false
results: HTML
- name: geizhals
engine: geizhals
shortcut: geiz
@@ -2593,6 +2608,23 @@ engines:
shortcut: wttr
timeout: 9.0
- name: zapmeta
engine: xpath
shortcut: zpm
search_url: https://www.zapmeta.com/search?q={query}&pg={pageno}
results_xpath: //article[contains(@class, "organic-results-item")]
url_xpath: ./h2/a/@href
title_xpath: ./h2
content_xpath: ./p
paging: true
send_page_num_on_first_page: false # otherwise blocks requests
disabled: true
about:
website: https://www.zapmeta.com/
use_official_api: false
require_api_key: false
results: HTML
- name: braveapi
engine: braveapi
# read https://docs.searxng.org/dev/engines/online/brave.html