mirror of
https://github.com/searxng/searxng.git
synced 2026-06-14 22:06:52 +02:00
e3d4fbe570
S1Search provides various different search services, which all seem to be somewhat based on Google and Yahoo. The site looks kinda suspicious, but the results are fine. You can find a list of their engines by using a subdomain finder like https://web-toolbox.dev/en/tools/subdomain-lookup and search for `s1search.co`.
99 lines
3.1 KiB
Python
99 lines
3.1 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Search engines by System1 (general).
|
|
|
|
System1 is an advertising company, and provides all its search engines as a
|
|
subdomain of ``s1search.co``. As a result, it has more than 1000 subdomains, of
|
|
which some work, and some don't.
|
|
|
|
Some of the engines get their results from Google, others get them from Yahoo.
|
|
"""
|
|
|
|
import typing as t
|
|
from urllib.parse import urlencode, urlparse, parse_qs
|
|
|
|
from lxml import html
|
|
|
|
from searx.result_types import EngineResults
|
|
from searx.enginelib import EngineCache
|
|
from searx.utils import eval_xpath_list, eval_xpath, extract_text
|
|
|
|
if t.TYPE_CHECKING:
|
|
from searx.search.processors import OnlineParams
|
|
from searx.extended_types import SXNG_Response
|
|
|
|
about = {
|
|
"website": "https://s1search.co",
|
|
"official_api_documentation": None,
|
|
"use_official_api": False,
|
|
"require_api_key": False,
|
|
"results": "HTML",
|
|
}
|
|
|
|
base_url = "" # alternatively: search.gmx.net
|
|
categories = ["general"]
|
|
|
|
paging = True
|
|
|
|
CACHE: EngineCache
|
|
"""Cache to store verification tokens for pagination."""
|
|
|
|
|
|
def init(_):
|
|
if not base_url:
|
|
raise ValueError("base_url must be set")
|
|
|
|
|
|
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
|
global CACHE # pylint: disable=global-statement
|
|
CACHE = EngineCache(engine_settings["name"])
|
|
return True
|
|
|
|
|
|
def _cache_key(query: str, pageno: int) -> str:
|
|
return f"{query}|{pageno}"
|
|
|
|
|
|
def request(query: str, params: "OnlineParams"):
|
|
args = {"q": query, "page": params["pageno"]}
|
|
if params["pageno"] > 1:
|
|
sc = CACHE.get(_cache_key(query, params["pageno"]))
|
|
# sc is required for pagination to avoid rate-limits
|
|
if not sc:
|
|
params["url"] = None
|
|
return
|
|
|
|
args["sc"] = sc
|
|
|
|
params["url"] = f"{base_url}/serp?{urlencode(args)}"
|
|
|
|
|
|
def response(resp: "SXNG_Response") -> EngineResults:
|
|
res = EngineResults()
|
|
|
|
doc = html.fromstring(resp.text)
|
|
|
|
for suggestion in eval_xpath_list(doc, "//div[@class='aylf-yahoo-bottom' or @class='aylf-yahoo-sidebar']/div"):
|
|
res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
|
|
|
for result in eval_xpath_list(
|
|
doc, "//div[contains(@class, 'web-yahoo') or contains(@class, 'web-google')]/div[contains(@class, '__result')]"
|
|
):
|
|
res.add(
|
|
res.types.MainResult(
|
|
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]/@href")),
|
|
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]")),
|
|
content=extract_text(eval_xpath(result, ".//span[contains(@class, 'description') or @class='']")),
|
|
)
|
|
)
|
|
|
|
# store pagination keys to be able to access next pages
|
|
for page_href in eval_xpath_list(doc, "//a[contains(@class, 'pagination__num')]"):
|
|
# target_url looks like "/serp?q=test&page=2&sc=RVlBPMDPVhWR20"
|
|
target_url = extract_text(eval_xpath(page_href, "./@href"))
|
|
target_url = parse_qs(urlparse(target_url).query)
|
|
pageno = int(target_url["page"][0])
|
|
sc = target_url["sc"][0]
|
|
CACHE.set(_cache_key(resp.search_params["query"], pageno), sc)
|
|
|
|
return res
|