mirror of
https://github.com/searxng/searxng.git
synced 2026-06-22 09:38:34 +02:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b3e08f2a44 | |||
| a857041afc | |||
| 31a8a22aa6 |
@@ -0,0 +1,149 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""T-Online_ is a German news portal, which is powered by Ströer, a German
|
||||
advertising company, not by Deutsche Telekom (contrary to its name).
|
||||
|
||||
It gets its web results from Google, image results from Flickr and videos
|
||||
results from YouTube.
|
||||
|
||||
.. _T-Online: https://www.t-online.de/
|
||||
|
||||
"""
|
||||
|
||||
import typing as t
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import eval_xpath_list, eval_xpath, extract_text, get_embeded_stream_url, ElementType
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://www.t-online.de",
|
||||
"wikidata_id": "Q590940",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "HTML",
|
||||
"language": "de",
|
||||
}
|
||||
|
||||
paging = True
|
||||
time_range_support = True
|
||||
|
||||
base_url = "https://suche.t-online.de"
|
||||
tonline_categ = "web"
|
||||
"""Supported categories are ``web``, ``videos``, ``news`` and ``images``."""
|
||||
|
||||
time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
|
||||
|
||||
# result provider has to be specified during pagination, pagination can alternatively
|
||||
# use "tonline" to only search for results from t-online news articles
|
||||
tonline_channel_map = {"images": "flickr", "videos": "yt"}
|
||||
|
||||
|
||||
def init(_):
|
||||
if tonline_categ not in ("web", "images", "videos", "news"):
|
||||
raise ValueError("invalid category: %s" % tonline_categ)
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams") -> None:
|
||||
# "mandant", "dia" and "ptl" are not needed, but this might reduce changes of captchas
|
||||
args = {"q": query, "mandant": "toi", "dia": "suche", "ptl": "std"}
|
||||
if params["time_range"]:
|
||||
args["age"] = time_range_map[params["time_range"]]
|
||||
|
||||
if params["pageno"] > 1 and tonline_categ in tonline_channel_map:
|
||||
ch = tonline_channel_map[tonline_categ]
|
||||
args["ch"] = ch
|
||||
args[f"{ch}_page"] = str(params["pageno"])
|
||||
else:
|
||||
args["page"] = str(params["pageno"])
|
||||
|
||||
params["url"] = f"{base_url}/{tonline_categ}?{urlencode(args)}"
|
||||
|
||||
|
||||
def _general_results(doc: ElementType, res: EngineResults):
|
||||
result: ElementType
|
||||
for result in eval_xpath_list(doc, "//div[@id='google_re']/div[contains(@class, 'doc')]"):
|
||||
(
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=extract_text(eval_xpath(result, "./a/@href") or ""),
|
||||
title=extract_text(eval_xpath(result, ".//span[contains(@class, 'tMMReshl')]") or "") or "",
|
||||
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'tMMRest')]") or "") or "",
|
||||
),
|
||||
)
|
||||
)
|
||||
suggestion: ElementType
|
||||
for suggestion in eval_xpath_list(doc, "//div[starts-with(@class, 'rsbl')]/a"):
|
||||
res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
||||
|
||||
|
||||
def _image_results(doc: ElementType, res: EngineResults):
|
||||
result: ElementType
|
||||
for result in eval_xpath_list(doc, "//div[@class='doc']"):
|
||||
(
|
||||
res.add(
|
||||
res.types.Image(
|
||||
url=extract_text(eval_xpath(result, "./a/@href") or ""),
|
||||
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'doc_info')]") or "") or "",
|
||||
thumbnail_src=extract_text(eval_xpath(result, ".//img/@src") or "") or "",
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _news_results(doc: ElementType, res: EngineResults):
|
||||
result: ElementType
|
||||
title_parts: list[ElementType]
|
||||
for result in eval_xpath_list(doc, "//div[@id='portal_re']/div[contains(@class, 'doc')]"):
|
||||
title_parts = eval_xpath(result, ".//a[starts-with(@class, 'tMMReshl')]")
|
||||
(
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=extract_text(eval_xpath(result, "(./a/@href)[1]") or ""),
|
||||
title=" - ".join(extract_text(part) or "" for part in title_parts),
|
||||
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'tMMRest')]") or "") or "",
|
||||
thumbnail=extract_text(eval_xpath(result, ".//img[contains(@class, 'desk')]/@src") or "") or "",
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _video_results(doc: ElementType, res: EngineResults):
|
||||
result: ElementType
|
||||
for result in eval_xpath_list(doc, "//div[@class='doc']"):
|
||||
url: str | None = extract_text(eval_xpath(result, "./a/@href") or "")
|
||||
if url is None:
|
||||
continue
|
||||
title_parts: list[ElementType] = eval_xpath(result, ".//a[starts-with(@class, 'tMMReshl')]")
|
||||
res.add(
|
||||
res.types.LegacyResult(
|
||||
template="videos.html",
|
||||
url=url,
|
||||
title=" - ".join(extract_text(part) or "" for part in title_parts),
|
||||
thumbnail=extract_text(eval_xpath(result, ".//img/@src") or "") or "",
|
||||
iframe_src=get_embeded_stream_url(url) or "",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
doc = html.fromstring(resp.text)
|
||||
res = EngineResults()
|
||||
match tonline_categ:
|
||||
case "web":
|
||||
_general_results(doc, res)
|
||||
case "news":
|
||||
_news_results(doc, res)
|
||||
case "images":
|
||||
_image_results(doc, res)
|
||||
case "videos":
|
||||
_video_results(doc, res)
|
||||
case _:
|
||||
raise ValueError("invalid category: %s" % tonline_categ)
|
||||
return res
|
||||
@@ -491,6 +491,22 @@ engines:
|
||||
engine: arxiv
|
||||
shortcut: arx
|
||||
|
||||
- name: ayo
|
||||
engine: xpath
|
||||
shortcut: ayo
|
||||
search_url: https://search.ayo.de/search?q={query}
|
||||
results_xpath: //div[contains(@class, 'search-result')]/div
|
||||
url_xpath: .//a/@href
|
||||
title_xpath: .//h3
|
||||
content_xpath: .//p
|
||||
suggestion_xpath: .//a[starts-with(@href, "https://search.ayo.de")]
|
||||
disabled: true
|
||||
about:
|
||||
website: https://serach.ayo.de
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: HTML
|
||||
|
||||
- name: azure
|
||||
engine: azure
|
||||
shortcut: az
|
||||
@@ -2313,6 +2329,36 @@ engines:
|
||||
shortcut: tm
|
||||
disabled: true
|
||||
|
||||
- name: tonline
|
||||
engine: tonline
|
||||
shortcut: tol
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: tonline images
|
||||
engine: tonline
|
||||
categories: images
|
||||
tonline_categ: images
|
||||
shortcut: toli
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: tonline videos
|
||||
engine: tonline
|
||||
categories: videos
|
||||
tonline_categ: videos
|
||||
shortcut: tolv
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: tonline news
|
||||
engine: tonline
|
||||
categories: news
|
||||
tonline_categ: news
|
||||
shortcut: toln
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
# Requires Tor
|
||||
- name: torch
|
||||
engine: xpath
|
||||
@@ -2790,6 +2836,49 @@ engines:
|
||||
shortcut: rehi
|
||||
disabled: true
|
||||
|
||||
- name: searchzee
|
||||
engine: json_engine
|
||||
search_url: https://searchzee.com/api/search?q={query}&type=web&offset={pageno}
|
||||
paging: true
|
||||
first_page_num: 0
|
||||
results_query: results
|
||||
url_query: url
|
||||
title_query: title
|
||||
content_query: summary
|
||||
content_html_to_text: true
|
||||
categories: general
|
||||
shortcut: sz
|
||||
disabled: true
|
||||
inactive: true
|
||||
about:
|
||||
website: https://searchzee.com
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: searchzee news
|
||||
engine: json_engine
|
||||
search_url: https://searchzee.com/api/search?q={query}&type=news&offset={pageno}{time_range}
|
||||
paging: true
|
||||
first_page_num: 0
|
||||
time_range_support: true
|
||||
time_range_url: "&freshness={time_range_val}"
|
||||
time_range_map:
|
||||
day: pd
|
||||
week: pw
|
||||
month: pm
|
||||
year: py
|
||||
results_query: results
|
||||
url_query: url
|
||||
title_query: title
|
||||
content_query: summary
|
||||
thumbnail_query: thumbnail
|
||||
content_html_to_text: true
|
||||
categories: news
|
||||
shortcut: sznw
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: swisscows
|
||||
engine: swisscows
|
||||
categories: general
|
||||
|
||||
Reference in New Issue
Block a user