[feat] engines: add zapmeta general search engine

[feat] engines: add gabanza general engine
[fix] online engines: send_accept_language_header is sent even if disabled
2026-06-22 09:38:34 +02:00 · 2026-06-03 22:38:59 +02:00 · 2026-06-03 22:38:23 +02:00 · 2026-06-03 22:37:13 +02:00 · 2026-06-03 22:37:13 +02:00 · 2026-06-03 22:35:21 +02:00
5 changed files with 51 additions and 5 deletions
@@ -41,7 +41,9 @@ safesearch_cookies = {0: "-2", 1: None, 2: "1"}
 safesearch_args = {0: "1", 1: None, 2: "1"}

 search_path_map = {"images": "i", "videos": "v", "news": "news"}
+
 _HTTP_User_Agent: str = gen_useragent()
+send_accept_language_header = False


 def init(engine_settings: dict[str, t.Any]):
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Public domain image archive"""

+import re
+
 from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 from json import dumps

@@ -49,6 +51,8 @@ paging = True

 __CACHED_API_URL = None

+_API_URL_RE = re.compile(r"\"(https://.*?/search-proxy)\"")
+

 def _clean_url(url):
    parsed = urlparse(url)
@@ -74,11 +78,12 @@ def _get_algolia_api_url():
    if resp.status_code != 200:
        raise LookupError("Failed to obtain AWS api url for PDImageArchive")

-    api_url = extr(resp.text, 'const r="', '"', default=None)
-
-    if api_url is None:
+    api_url_match = _API_URL_RE.search(resp.text)
+    if api_url_match is None:
        raise LookupError("Couldn't obtain AWS api url for PDImageArchive")

+    api_url = api_url_match.group(1)
+
    __CACHED_API_URL = api_url
    return api_url

@@ -174,6 +174,10 @@ number, but an offset.'''
 first_page_num = 1
 '''Number of the first page (usually 0 or 1).'''

+send_page_num_on_first_page = True
+'''Whether to include the page number in the request for the first page.
+This can help if an engine blocks request that send a page number for the first page.'''
+
 time_range_support = False
 '''Engine supports search time range.'''

@@ -238,10 +242,14 @@ def request(query, params):
    if safe_search_val is not None:
        safe_search = safe_search_map[safe_search_val]

+    pageno = ""
+    if send_page_num_on_first_page or params["pageno"] != 1:
+        pageno = (params['pageno'] - 1) * page_size + first_page_num
+
    fargs = {
        'query': urlencode({'q': query})[2:],
        'lang': lang,
-        'pageno': (params['pageno'] - 1) * page_size + first_page_num,
+        'pageno': pageno,
        'time_range': time_range,
        'safe_search': safe_search,
    }
@@ -152,7 +152,6 @@ class OnlineProcessor(EngineProcessor):
        # add Accept-Language header
        # https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language

-        headers["Accept-Language"] = "en,en-US;q=0.7,en;q=0.3"
        if self.engine.send_accept_language_header and search_query.locale:
            _l = search_query.locale.language
            _t = search_query.locale.territory or _l
@@ -964,6 +964,21 @@ engines:
    timeout: 8.0
    disabled: true

+  - name: gabanza
+    engine: xpath
+    search_url: https://www.gabanza.com/search?query={query}
+    shortcut: gab
+    disabled: true
+    results_xpath: //div[contains(@class, "border-t")]/div/div
+    url_xpath: .//a/@href
+    title_xpath: ./a
+    content_xpath: .//p
+    about:
+      website: https://www.gabanza.com
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+
  - name: geizhals
    engine: geizhals
    shortcut: geiz
@@ -2593,6 +2608,23 @@ engines:
    shortcut: wttr
    timeout: 9.0

+  - name: zapmeta
+    engine: xpath
+    shortcut: zpm
+    search_url: https://www.zapmeta.com/search?q={query}&pg={pageno}
+    results_xpath: //article[contains(@class, "organic-results-item")]
+    url_xpath: ./h2/a/@href
+    title_xpath: ./h2
+    content_xpath: ./p
+    paging: true
+    send_page_num_on_first_page: false  # otherwise blocks requests
+    disabled: true
+    about:
+      website: https://www.zapmeta.com/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
+
  - name: braveapi
    engine: braveapi
    # read https://docs.searxng.org/dev/engines/online/brave.html
Author	SHA1	Message	Date
Bnyro	5bae05514b	[feat] engines: add zapmeta general search engine	2026-06-03 22:38:59 +02:00
Bnyro	00ca5776f2	[feat] engines: add gabanza general engine	2026-06-03 22:38:23 +02:00
Bnyro	577f5f2f30	[fix] online engines: send_accept_language_header is sent even if disabled	2026-06-03 22:37:13 +02:00
Bnyro	253dc86c10	[fix] duckduckgo: image requests get blocked	2026-06-03 22:37:13 +02:00
Bnyro	3066bc19eb	[fix] public domain image archive: fails to extract API url	2026-06-03 22:35:21 +02:00