[feat] engines: add German tonline engine (general, news, images, videos) (#6250)

T-Online_ is a German news portal.

It gets its web results from Google, image results from Flickr and videos results
from YouTube.

For images and videos, it additionally returns result from its
news catalog. However, for pagination we have to specify the result
type (e.g. either videos from YouTube or from T-Online), so we use
flickr/youtube there instead of tonline because the tonline results
are usually irrelevant.
This commit is contained in:
Bnyro
2026-06-14 08:46:07 +02:00
committed by GitHub
parent a29cda858c
commit 31a8a22aa6
2 changed files with 179 additions and 0 deletions
+149
View File
@@ -0,0 +1,149 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""T-Online_ is a German news portal, which is powered by Ströer, a German
advertising company, not by Deutsche Telekom (contrary to its name).
It gets its web results from Google, image results from Flickr and videos
results from YouTube.
.. _T-Online: https://www.t-online.de/
"""
import typing as t
from urllib.parse import urlencode
from lxml import html
from searx.utils import eval_xpath_list, eval_xpath, extract_text, get_embeded_stream_url, ElementType
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://www.t-online.de",
"wikidata_id": "Q590940",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "de",
}
paging = True
time_range_support = True
base_url = "https://suche.t-online.de"
tonline_categ = "web"
"""Supported categories are ``web``, ``videos``, ``news`` and ``images``."""
time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
# result provider has to be specified during pagination, pagination can alternatively
# use "tonline" to only search for results from t-online news articles
tonline_channel_map = {"images": "flickr", "videos": "yt"}
def init(_):
if tonline_categ not in ("web", "images", "videos", "news"):
raise ValueError("invalid category: %s" % tonline_categ)
def request(query: str, params: "OnlineParams") -> None:
# "mandant", "dia" and "ptl" are not needed, but this might reduce changes of captchas
args = {"q": query, "mandant": "toi", "dia": "suche", "ptl": "std"}
if params["time_range"]:
args["age"] = time_range_map[params["time_range"]]
if params["pageno"] > 1 and tonline_categ in tonline_channel_map:
ch = tonline_channel_map[tonline_categ]
args["ch"] = ch
args[f"{ch}_page"] = str(params["pageno"])
else:
args["page"] = str(params["pageno"])
params["url"] = f"{base_url}/{tonline_categ}?{urlencode(args)}"
def _general_results(doc: ElementType, res: EngineResults):
result: ElementType
for result in eval_xpath_list(doc, "//div[@id='google_re']/div[contains(@class, 'doc')]"):
(
res.add(
res.types.MainResult(
url=extract_text(eval_xpath(result, "./a/@href") or ""),
title=extract_text(eval_xpath(result, ".//span[contains(@class, 'tMMReshl')]") or "") or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'tMMRest')]") or "") or "",
),
)
)
suggestion: ElementType
for suggestion in eval_xpath_list(doc, "//div[starts-with(@class, 'rsbl')]/a"):
res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
def _image_results(doc: ElementType, res: EngineResults):
result: ElementType
for result in eval_xpath_list(doc, "//div[@class='doc']"):
(
res.add(
res.types.Image(
url=extract_text(eval_xpath(result, "./a/@href") or ""),
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'doc_info')]") or "") or "",
thumbnail_src=extract_text(eval_xpath(result, ".//img/@src") or "") or "",
),
)
)
def _news_results(doc: ElementType, res: EngineResults):
result: ElementType
title_parts: list[ElementType]
for result in eval_xpath_list(doc, "//div[@id='portal_re']/div[contains(@class, 'doc')]"):
title_parts = eval_xpath(result, ".//a[starts-with(@class, 'tMMReshl')]")
(
res.add(
res.types.MainResult(
url=extract_text(eval_xpath(result, "(./a/@href)[1]") or ""),
title=" - ".join(extract_text(part) or "" for part in title_parts),
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'tMMRest')]") or "") or "",
thumbnail=extract_text(eval_xpath(result, ".//img[contains(@class, 'desk')]/@src") or "") or "",
),
)
)
def _video_results(doc: ElementType, res: EngineResults):
result: ElementType
for result in eval_xpath_list(doc, "//div[@class='doc']"):
url: str | None = extract_text(eval_xpath(result, "./a/@href") or "")
if url is None:
continue
title_parts: list[ElementType] = eval_xpath(result, ".//a[starts-with(@class, 'tMMReshl')]")
res.add(
res.types.LegacyResult(
template="videos.html",
url=url,
title=" - ".join(extract_text(part) or "" for part in title_parts),
thumbnail=extract_text(eval_xpath(result, ".//img/@src") or "") or "",
iframe_src=get_embeded_stream_url(url) or "",
)
)
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
res = EngineResults()
match tonline_categ:
case "web":
_general_results(doc, res)
case "news":
_news_results(doc, res)
case "images":
_image_results(doc, res)
case "videos":
_video_results(doc, res)
case _:
raise ValueError("invalid category: %s" % tonline_categ)
return res
+30
View File
@@ -2313,6 +2313,36 @@ engines:
shortcut: tm
disabled: true
- name: tonline
engine: tonline
shortcut: tol
disabled: true
inactive: true
- name: tonline images
engine: tonline
categories: images
tonline_categ: images
shortcut: toli
disabled: true
inactive: true
- name: tonline videos
engine: tonline
categories: videos
tonline_categ: videos
shortcut: tolv
disabled: true
inactive: true
- name: tonline news
engine: tonline
categories: news
tonline_categ: news
shortcut: toln
disabled: true
inactive: true
# Requires Tor
- name: torch
engine: xpath