Compare commits

..

3 Commits

Author SHA1 Message Date
Bnyro b3e08f2a44 [feat] engines: add searchzee engine (general, news)
The results seem to be from Brave (i.e. they are exactly
the same). But it doesn't have any strict rate-limits,
so that's nice.

News support time ranges, but apart from that, unfortunately it doesn't
support any advanced features like safesearch or languages.
2026-06-14 09:59:39 +02:00
Bnyro a857041afc [feat] engines: add support for search.ayo.de 2026-06-14 09:32:58 +02:00
Bnyro 31a8a22aa6 [feat] engines: add German tonline engine (general, news, images, videos) (#6250)
T-Online_ is a German news portal.

It gets its web results from Google, image results from Flickr and videos results
from YouTube.

For images and videos, it additionally returns result from its
news catalog. However, for pagination we have to specify the result
type (e.g. either videos from YouTube or from T-Online), so we use
flickr/youtube there instead of tonline because the tonline results
are usually irrelevant.
2026-06-14 08:46:07 +02:00
2 changed files with 238 additions and 0 deletions
+149
View File
@@ -0,0 +1,149 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""T-Online_ is a German news portal, which is powered by Ströer, a German
advertising company, not by Deutsche Telekom (contrary to its name).
It gets its web results from Google, image results from Flickr and videos
results from YouTube.
.. _T-Online: https://www.t-online.de/
"""
import typing as t
from urllib.parse import urlencode
from lxml import html
from searx.utils import eval_xpath_list, eval_xpath, extract_text, get_embeded_stream_url, ElementType
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://www.t-online.de",
"wikidata_id": "Q590940",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "de",
}
paging = True
time_range_support = True
base_url = "https://suche.t-online.de"
tonline_categ = "web"
"""Supported categories are ``web``, ``videos``, ``news`` and ``images``."""
time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
# result provider has to be specified during pagination, pagination can alternatively
# use "tonline" to only search for results from t-online news articles
tonline_channel_map = {"images": "flickr", "videos": "yt"}
def init(_):
if tonline_categ not in ("web", "images", "videos", "news"):
raise ValueError("invalid category: %s" % tonline_categ)
def request(query: str, params: "OnlineParams") -> None:
# "mandant", "dia" and "ptl" are not needed, but this might reduce changes of captchas
args = {"q": query, "mandant": "toi", "dia": "suche", "ptl": "std"}
if params["time_range"]:
args["age"] = time_range_map[params["time_range"]]
if params["pageno"] > 1 and tonline_categ in tonline_channel_map:
ch = tonline_channel_map[tonline_categ]
args["ch"] = ch
args[f"{ch}_page"] = str(params["pageno"])
else:
args["page"] = str(params["pageno"])
params["url"] = f"{base_url}/{tonline_categ}?{urlencode(args)}"
def _general_results(doc: ElementType, res: EngineResults):
result: ElementType
for result in eval_xpath_list(doc, "//div[@id='google_re']/div[contains(@class, 'doc')]"):
(
res.add(
res.types.MainResult(
url=extract_text(eval_xpath(result, "./a/@href") or ""),
title=extract_text(eval_xpath(result, ".//span[contains(@class, 'tMMReshl')]") or "") or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'tMMRest')]") or "") or "",
),
)
)
suggestion: ElementType
for suggestion in eval_xpath_list(doc, "//div[starts-with(@class, 'rsbl')]/a"):
res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
def _image_results(doc: ElementType, res: EngineResults):
result: ElementType
for result in eval_xpath_list(doc, "//div[@class='doc']"):
(
res.add(
res.types.Image(
url=extract_text(eval_xpath(result, "./a/@href") or ""),
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'doc_info')]") or "") or "",
thumbnail_src=extract_text(eval_xpath(result, ".//img/@src") or "") or "",
),
)
)
def _news_results(doc: ElementType, res: EngineResults):
result: ElementType
title_parts: list[ElementType]
for result in eval_xpath_list(doc, "//div[@id='portal_re']/div[contains(@class, 'doc')]"):
title_parts = eval_xpath(result, ".//a[starts-with(@class, 'tMMReshl')]")
(
res.add(
res.types.MainResult(
url=extract_text(eval_xpath(result, "(./a/@href)[1]") or ""),
title=" - ".join(extract_text(part) or "" for part in title_parts),
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'tMMRest')]") or "") or "",
thumbnail=extract_text(eval_xpath(result, ".//img[contains(@class, 'desk')]/@src") or "") or "",
),
)
)
def _video_results(doc: ElementType, res: EngineResults):
result: ElementType
for result in eval_xpath_list(doc, "//div[@class='doc']"):
url: str | None = extract_text(eval_xpath(result, "./a/@href") or "")
if url is None:
continue
title_parts: list[ElementType] = eval_xpath(result, ".//a[starts-with(@class, 'tMMReshl')]")
res.add(
res.types.LegacyResult(
template="videos.html",
url=url,
title=" - ".join(extract_text(part) or "" for part in title_parts),
thumbnail=extract_text(eval_xpath(result, ".//img/@src") or "") or "",
iframe_src=get_embeded_stream_url(url) or "",
)
)
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
res = EngineResults()
match tonline_categ:
case "web":
_general_results(doc, res)
case "news":
_news_results(doc, res)
case "images":
_image_results(doc, res)
case "videos":
_video_results(doc, res)
case _:
raise ValueError("invalid category: %s" % tonline_categ)
return res
+89
View File
@@ -491,6 +491,22 @@ engines:
engine: arxiv
shortcut: arx
- name: ayo
engine: xpath
shortcut: ayo
search_url: https://search.ayo.de/search?q={query}
results_xpath: //div[contains(@class, 'search-result')]/div
url_xpath: .//a/@href
title_xpath: .//h3
content_xpath: .//p
suggestion_xpath: .//a[starts-with(@href, "https://search.ayo.de")]
disabled: true
about:
website: https://serach.ayo.de
use_official_api: false
require_api_key: false
results: HTML
- name: azure
engine: azure
shortcut: az
@@ -2313,6 +2329,36 @@ engines:
shortcut: tm
disabled: true
- name: tonline
engine: tonline
shortcut: tol
disabled: true
inactive: true
- name: tonline images
engine: tonline
categories: images
tonline_categ: images
shortcut: toli
disabled: true
inactive: true
- name: tonline videos
engine: tonline
categories: videos
tonline_categ: videos
shortcut: tolv
disabled: true
inactive: true
- name: tonline news
engine: tonline
categories: news
tonline_categ: news
shortcut: toln
disabled: true
inactive: true
# Requires Tor
- name: torch
engine: xpath
@@ -2790,6 +2836,49 @@ engines:
shortcut: rehi
disabled: true
- name: searchzee
engine: json_engine
search_url: https://searchzee.com/api/search?q={query}&type=web&offset={pageno}
paging: true
first_page_num: 0
results_query: results
url_query: url
title_query: title
content_query: summary
content_html_to_text: true
categories: general
shortcut: sz
disabled: true
inactive: true
about:
website: https://searchzee.com
use_official_api: false
require_api_key: false
results: JSON
- name: searchzee news
engine: json_engine
search_url: https://searchzee.com/api/search?q={query}&type=news&offset={pageno}{time_range}
paging: true
first_page_num: 0
time_range_support: true
time_range_url: "&freshness={time_range_val}"
time_range_map:
day: pd
week: pw
month: pm
year: py
results_query: results
url_query: url
title_query: title
content_query: summary
thumbnail_query: thumbnail
content_html_to_text: true
categories: news
shortcut: sznw
disabled: true
inactive: true
- name: swisscows
engine: swisscows
categories: general