[feat] engines: add s1search general engine (#6186)

S1Search provides various different search services, which all seem to be somewhat based on Google and Yahoo. The site looks kinda suspicious, but the results are fine. You can find a list of their engines by using a subdomain finder like https://web-toolbox.dev/en/tools/subdomain-lookup and search for `s1search.co`.
2026-06-15 06:16:51 +02:00 · 2026-06-13 14:18:04 +02:00
parent 031747f29e
commit e3d4fbe570
2 changed files with 130 additions and 0 deletions
@@ -0,0 +1,98 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Search engines by System1 (general).
 System1 is an advertising company, and provides all its search engines as a
 subdomain of ``s1search.co``.  As a result, it has more than 1000 subdomains, of
 which some work, and some don't.
 Some of the engines get their results from Google, others get them from Yahoo.
 """
 import typing as t
 from urllib.parse import urlencode, urlparse, parse_qs
 from lxml import html
 from searx.result_types import EngineResults
 from searx.enginelib import EngineCache
 from searx.utils import eval_xpath_list, eval_xpath, extract_text
 if t.TYPE_CHECKING:
    from searx.search.processors import OnlineParams
    from searx.extended_types import SXNG_Response
 about = {
    "website": "https://s1search.co",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "HTML",
 }
 base_url = ""  # alternatively: search.gmx.net
 categories = ["general"]
 paging = True
 CACHE: EngineCache
 """Cache to store verification tokens for pagination."""
 def init(_):
    if not base_url:
        raise ValueError("base_url must be set")
 def setup(engine_settings: dict[str, t.Any]) -> bool:
    global CACHE  # pylint: disable=global-statement
    CACHE = EngineCache(engine_settings["name"])
    return True
 def _cache_key(query: str, pageno: int) -> str:
    return f"{query}|{pageno}"
 def request(query: str, params: "OnlineParams"):
    args = {"q": query, "page": params["pageno"]}
    if params["pageno"] > 1:
        sc = CACHE.get(_cache_key(query, params["pageno"]))
        # sc is required for pagination to avoid rate-limits
        if not sc:
            params["url"] = None
            return
        args["sc"] = sc
    params["url"] = f"{base_url}/serp?{urlencode(args)}"
 def response(resp: "SXNG_Response") -> EngineResults:
    res = EngineResults()
    doc = html.fromstring(resp.text)
    for suggestion in eval_xpath_list(doc, "//div[@class='aylf-yahoo-bottom' or @class='aylf-yahoo-sidebar']/div"):
        res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
    for result in eval_xpath_list(
        doc, "//div[contains(@class, 'web-yahoo') or contains(@class, 'web-google')]/div[contains(@class, '__result')]"
    ):
        res.add(
            res.types.MainResult(
                url=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]/@href")),
                title=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]")),
                content=extract_text(eval_xpath(result, ".//span[contains(@class, 'description') or @class='']")),
            )
        )
    # store pagination keys to be able to access next pages
    for page_href in eval_xpath_list(doc, "//a[contains(@class, 'pagination__num')]"):
        # target_url looks like "/serp?q=test&page=2&sc=RVlBPMDPVhWR20"
        target_url = extract_text(eval_xpath(page_href, "./@href"))
        target_url = parse_qs(urlparse(target_url).query)
        pageno = int(target_url["page"][0])
        sc = target_url["sc"][0]
        CACHE.set(_cache_key(resp.search_params["query"], pageno), sc)
    return res
@@ -2845,6 +2845,38 @@ engines:
      website: https://minecraft.wiki/
      wikidata_id: Q105533483
  # s1search google engines / mirrors
  - name: searchtoday
    engine: s1search
    shortcut: std
    base_url: https://info.searchtoday.site
    disabled: true
  # - name: webcrawler
  #   engine: s1search
  #   shortcut: wc
  #   base_url: https://www.webcrawler.com
  #   disabled: true
  # s1search yahoo engines / mirrors
  # - name: excite
  #   engine: s1search
  #   shortcut: exc
  #   base_url: https://results.excite.com.s1search.co
  #   disabled: true
  # - name: metacrawler
  #   engine: s1search
  #   shortcut: mec
  #   base_url: https://search.metacrawler.com
  #   disabled: true
  - name: infospace
    engine: s1search
    shortcut: ifs
    base_url: https://search.infospace.com
    disabled: true
 # Doku engine lets you access to any Doku wiki instance:
 # A public one or a privete/corporate one.
 #  - name: ubuntuwiki