[feat] engines: add s1search general engine (#6186)

S1Search provides various different search services, which all seem to be somewhat based on Google and Yahoo. The site looks kinda suspicious, but the results are fine. You can find a list of their engines by using a subdomain finder like https://web-toolbox.dev/en/tools/subdomain-lookup and search for `s1search.co`.
2026-06-14 22:06:52 +02:00 · 2026-06-13 14:18:04 +02:00
parent 031747f29e
commit e3d4fbe570
2 changed files with 130 additions and 0 deletions
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Search engines by System1 (general).
+
+System1 is an advertising company, and provides all its search engines as a
+subdomain of ``s1search.co``.  As a result, it has more than 1000 subdomains, of
+which some work, and some don't.
+
+Some of the engines get their results from Google, others get them from Yahoo.
+"""
+
+import typing as t
+from urllib.parse import urlencode, urlparse, parse_qs
+
+from lxml import html
+
+from searx.result_types import EngineResults
+from searx.enginelib import EngineCache
+from searx.utils import eval_xpath_list, eval_xpath, extract_text
+
+if t.TYPE_CHECKING:
+    from searx.search.processors import OnlineParams
+    from searx.extended_types import SXNG_Response
+
+about = {
+    "website": "https://s1search.co",
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": "HTML",
+}
+
+base_url = ""  # alternatively: search.gmx.net
+categories = ["general"]
+
+paging = True
+
+CACHE: EngineCache
+"""Cache to store verification tokens for pagination."""
+
+
+def init(_):
+    if not base_url:
+        raise ValueError("base_url must be set")
+
+
+def setup(engine_settings: dict[str, t.Any]) -> bool:
+    global CACHE  # pylint: disable=global-statement
+    CACHE = EngineCache(engine_settings["name"])
+    return True
+
+
+def _cache_key(query: str, pageno: int) -> str:
+    return f"{query}|{pageno}"
+
+
+def request(query: str, params: "OnlineParams"):
+    args = {"q": query, "page": params["pageno"]}
+    if params["pageno"] > 1:
+        sc = CACHE.get(_cache_key(query, params["pageno"]))
+        # sc is required for pagination to avoid rate-limits
+        if not sc:
+            params["url"] = None
+            return
+
+        args["sc"] = sc
+
+    params["url"] = f"{base_url}/serp?{urlencode(args)}"
+
+
+def response(resp: "SXNG_Response") -> EngineResults:
+    res = EngineResults()
+
+    doc = html.fromstring(resp.text)
+
+    for suggestion in eval_xpath_list(doc, "//div[@class='aylf-yahoo-bottom' or @class='aylf-yahoo-sidebar']/div"):
+        res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
+
+    for result in eval_xpath_list(
+        doc, "//div[contains(@class, 'web-yahoo') or contains(@class, 'web-google')]/div[contains(@class, '__result')]"
+    ):
+        res.add(
+            res.types.MainResult(
+                url=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]/@href")),
+                title=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]")),
+                content=extract_text(eval_xpath(result, ".//span[contains(@class, 'description') or @class='']")),
+            )
+        )
+
+    # store pagination keys to be able to access next pages
+    for page_href in eval_xpath_list(doc, "//a[contains(@class, 'pagination__num')]"):
+        # target_url looks like "/serp?q=test&page=2&sc=RVlBPMDPVhWR20"
+        target_url = extract_text(eval_xpath(page_href, "./@href"))
+        target_url = parse_qs(urlparse(target_url).query)
+        pageno = int(target_url["page"][0])
+        sc = target_url["sc"][0]
+        CACHE.set(_cache_key(resp.search_params["query"], pageno), sc)
+
+    return res
@@ -2845,6 +2845,38 @@ engines:
      website: https://minecraft.wiki/
      wikidata_id: Q105533483

+  # s1search google engines / mirrors
+  - name: searchtoday
+    engine: s1search
+    shortcut: std
+    base_url: https://info.searchtoday.site
+    disabled: true
+
+  # - name: webcrawler
+  #   engine: s1search
+  #   shortcut: wc
+  #   base_url: https://www.webcrawler.com
+  #   disabled: true
+
+  # s1search yahoo engines / mirrors
+  # - name: excite
+  #   engine: s1search
+  #   shortcut: exc
+  #   base_url: https://results.excite.com.s1search.co
+  #   disabled: true
+
+  # - name: metacrawler
+  #   engine: s1search
+  #   shortcut: mec
+  #   base_url: https://search.metacrawler.com
+  #   disabled: true
+
+  - name: infospace
+    engine: s1search
+    shortcut: ifs
+    base_url: https://search.infospace.com
+    disabled: true
+
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 #  - name: ubuntuwiki