mirror of
https://github.com/searxng/searxng.git
synced 2026-06-14 22:06:52 +02:00
[feat] engines: add s1search general engine (#6186)
S1Search provides various different search services, which all seem to be somewhat based on Google and Yahoo. The site looks kinda suspicious, but the results are fine. You can find a list of their engines by using a subdomain finder like https://web-toolbox.dev/en/tools/subdomain-lookup and search for `s1search.co`.
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Search engines by System1 (general).
|
||||
|
||||
System1 is an advertising company, and provides all its search engines as a
|
||||
subdomain of ``s1search.co``. As a result, it has more than 1000 subdomains, of
|
||||
which some work, and some don't.
|
||||
|
||||
Some of the engines get their results from Google, others get them from Yahoo.
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
from urllib.parse import urlencode, urlparse, parse_qs
|
||||
|
||||
from lxml import html
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.utils import eval_xpath_list, eval_xpath, extract_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.search.processors import OnlineParams
|
||||
from searx.extended_types import SXNG_Response
|
||||
|
||||
about = {
|
||||
"website": "https://s1search.co",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
base_url = "" # alternatively: search.gmx.net
|
||||
categories = ["general"]
|
||||
|
||||
paging = True
|
||||
|
||||
CACHE: EngineCache
|
||||
"""Cache to store verification tokens for pagination."""
|
||||
|
||||
|
||||
def init(_):
|
||||
if not base_url:
|
||||
raise ValueError("base_url must be set")
|
||||
|
||||
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache(engine_settings["name"])
|
||||
return True
|
||||
|
||||
|
||||
def _cache_key(query: str, pageno: int) -> str:
|
||||
return f"{query}|{pageno}"
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams"):
|
||||
args = {"q": query, "page": params["pageno"]}
|
||||
if params["pageno"] > 1:
|
||||
sc = CACHE.get(_cache_key(query, params["pageno"]))
|
||||
# sc is required for pagination to avoid rate-limits
|
||||
if not sc:
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
args["sc"] = sc
|
||||
|
||||
params["url"] = f"{base_url}/serp?{urlencode(args)}"
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
doc = html.fromstring(resp.text)
|
||||
|
||||
for suggestion in eval_xpath_list(doc, "//div[@class='aylf-yahoo-bottom' or @class='aylf-yahoo-sidebar']/div"):
|
||||
res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
||||
|
||||
for result in eval_xpath_list(
|
||||
doc, "//div[contains(@class, 'web-yahoo') or contains(@class, 'web-google')]/div[contains(@class, '__result')]"
|
||||
):
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]/@href")),
|
||||
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]")),
|
||||
content=extract_text(eval_xpath(result, ".//span[contains(@class, 'description') or @class='']")),
|
||||
)
|
||||
)
|
||||
|
||||
# store pagination keys to be able to access next pages
|
||||
for page_href in eval_xpath_list(doc, "//a[contains(@class, 'pagination__num')]"):
|
||||
# target_url looks like "/serp?q=test&page=2&sc=RVlBPMDPVhWR20"
|
||||
target_url = extract_text(eval_xpath(page_href, "./@href"))
|
||||
target_url = parse_qs(urlparse(target_url).query)
|
||||
pageno = int(target_url["page"][0])
|
||||
sc = target_url["sc"][0]
|
||||
CACHE.set(_cache_key(resp.search_params["query"], pageno), sc)
|
||||
|
||||
return res
|
||||
@@ -2845,6 +2845,38 @@ engines:
|
||||
website: https://minecraft.wiki/
|
||||
wikidata_id: Q105533483
|
||||
|
||||
# s1search google engines / mirrors
|
||||
- name: searchtoday
|
||||
engine: s1search
|
||||
shortcut: std
|
||||
base_url: https://info.searchtoday.site
|
||||
disabled: true
|
||||
|
||||
# - name: webcrawler
|
||||
# engine: s1search
|
||||
# shortcut: wc
|
||||
# base_url: https://www.webcrawler.com
|
||||
# disabled: true
|
||||
|
||||
# s1search yahoo engines / mirrors
|
||||
# - name: excite
|
||||
# engine: s1search
|
||||
# shortcut: exc
|
||||
# base_url: https://results.excite.com.s1search.co
|
||||
# disabled: true
|
||||
|
||||
# - name: metacrawler
|
||||
# engine: s1search
|
||||
# shortcut: mec
|
||||
# base_url: https://search.metacrawler.com
|
||||
# disabled: true
|
||||
|
||||
- name: infospace
|
||||
engine: s1search
|
||||
shortcut: ifs
|
||||
base_url: https://search.infospace.com
|
||||
disabled: true
|
||||
|
||||
# Doku engine lets you access to any Doku wiki instance:
|
||||
# A public one or a privete/corporate one.
|
||||
# - name: ubuntuwiki
|
||||
|
||||
Reference in New Issue
Block a user