[mod] swisscows engines: add language / region support

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser
2026-06-05 13:51:21 +02:00
committed by Bnyro
parent ed369ac0ec
commit e7cf57e9ae
2 changed files with 72 additions and 15 deletions
+54 -9
View File
@@ -2,6 +2,8 @@
# pylint: disable=invalid-name # pylint: disable=invalid-name
"""Swisscows (general, images, videos)""" """Swisscows (general, images, videos)"""
import typing as t
import base64 import base64
import codecs import codecs
import hashlib import hashlib
@@ -11,9 +13,9 @@ import random
from datetime import datetime from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
import typing as t from babel.core import get_global
from searx.result_types import EngineResults, LegacyResult from searx.result_types import EngineResults, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
from searx.utils import humanize_number, html_to_text from searx.utils import humanize_number, html_to_text
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
@@ -33,6 +35,7 @@ about = {
categories = ["general"] categories = ["general"]
swisscows_category = "web" # possible: "web", "videos", "images" swisscows_category = "web" # possible: "web", "videos", "images"
results_per_page = 50 results_per_page = 50
time_range_support = True time_range_support = True
@@ -45,6 +48,45 @@ NONCE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"} time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
# fmt: off
swisscows_regions: list[str] = [
"AR", "AU", "AT", "BE", "BR", "CA", "CL", "CN", "DK", "FI",
"FR", "DE", "HK", "HU", "IN", "ID", "IT", "JP", "KR", "LV",
"MY", "MX", "NL", "NZ", "NO", "PH", "PL", "PT", "RU", "SA",
"ZA", "ES", "SE", "CH", "TW", "TR", "UA", "GB", "US"
]
"""Regions supported by swisscows."""
# fmt: on
# swisscows_languages = [
# "GB", "DE", "ES", "FR", "IT", "LV", "HU", "NL", "PT", "RU", "UA"
# ]
def appropriate_locale(searxng_locale: str, regions: list[str], default: str) -> str:
"""Returns the appropriate swisscows locale for the region or language
selected by the user. If no value is determined, ``default`` is returned
"""
_locale = searxng_locale.split("-")
if _locale[0] == "all":
return default
if len(_locale) == 1 or _locale[1] in regions:
return searxng_locale
sxng_lang = _locale[0]
if sxng_lang.upper() in regions:
return f"{sxng_lang}-{sxng_lang.upper()}"
likely_subtag: str | None = get_global("likely_subtags").get(sxng_lang)
if likely_subtag:
_tag: list[str] = likely_subtag.split("_")
if _tag[-1] in regions:
return f"{_tag[0]}-{_tag[-1]}"
return default
def generate_nonce(length: int = 32) -> str: def generate_nonce(length: int = 32) -> str:
""" """
@@ -126,6 +168,7 @@ def request(query: str, params: "OnlineParams") -> None:
params["url"] = None params["url"] = None
return return
locale = appropriate_locale(params["searxng_locale"], swisscows_regions, "en-US")
base_path = "" base_path = ""
args = dict[str, t.Any] args = dict[str, t.Any]
if swisscows_category == "web": if swisscows_category == "web":
@@ -135,7 +178,7 @@ def request(query: str, params: "OnlineParams") -> None:
args = { args = {
"freshness": freshness, "freshness": freshness,
"itemsCount": results_per_page, "itemsCount": results_per_page,
"locale": "en-US", "locale": locale,
"offset": (params["pageno"] - 1) * results_per_page, "offset": (params["pageno"] - 1) * results_per_page,
"query": query, "query": query,
"spellcheck": True, "spellcheck": True,
@@ -144,7 +187,7 @@ def request(query: str, params: "OnlineParams") -> None:
elif swisscows_category == "images": elif swisscows_category == "images":
args = { args = {
"itemsCount": results_per_page, "itemsCount": results_per_page,
"locale": "en-US", "locale": locale,
"offset": (params["pageno"] - 1) * results_per_page, "offset": (params["pageno"] - 1) * results_per_page,
"query": query, "query": query,
"spellcheck": True, "spellcheck": True,
@@ -155,7 +198,7 @@ def request(query: str, params: "OnlineParams") -> None:
"itemsCount": results_per_page, "itemsCount": results_per_page,
"offset": (params["pageno"] - 1) * results_per_page, "offset": (params["pageno"] - 1) * results_per_page,
"query": query, "query": query,
"region": "en-US", "region": locale,
"spellcheck": True, "spellcheck": True,
} }
base_path = "/v2/videos/search" base_path = "/v2/videos/search"
@@ -171,14 +214,14 @@ def request(query: str, params: "OnlineParams") -> None:
params["url"] = f"{base_url}{base_path}?{urlencode(args)}" params["url"] = f"{base_url}{base_path}?{urlencode(args)}"
def _video_result(result: dict[str, t.Any]) -> LegacyResult: def _video_result(result: dict[str, str]) -> LegacyResult:
published_date = None published_date = None
if result.get("datePublished"): if result.get("datePublished"):
published_date = datetime.fromisoformat(result["datePublished"]) published_date = datetime.fromisoformat(result["datePublished"])
view_count = None view_count = None
if result.get("viewCount"): if result.get("viewCount"):
view_count = humanize_number(result["viewCount"]) view_count = humanize_number(result["viewCount"]) # pyright: ignore[reportArgumentType]
return LegacyResult( return LegacyResult(
{ {
@@ -186,7 +229,8 @@ def _video_result(result: dict[str, t.Any]) -> LegacyResult:
"url": result["url"], "url": result["url"],
"title": html_to_text(result.get("title") or result["name"]), "title": html_to_text(result.get("title") or result["name"]),
"content": result["description"], "content": result["description"],
"thumbnail": result.get("thumbnailUrl") or result.get("thumbnail", {}).get("url"), "thumbnail": result.get("thumbnailUrl")
or result.get("thumbnail", {}).get("url"), # pyright: ignore[reportAttributeAccessIssue]
"length": result.get("duration"), "length": result.get("duration"),
"iframe_src": result.get("embedUrl"), "iframe_src": result.get("embedUrl"),
"publishedDate": published_date, "publishedDate": published_date,
@@ -195,7 +239,7 @@ def _video_result(result: dict[str, t.Any]) -> LegacyResult:
) )
def response(resp: "SXNG_Response"): def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults() res = EngineResults()
json_data = resp.json() json_data = resp.json()
@@ -211,6 +255,7 @@ def response(resp: "SXNG_Response"):
decoded = base64.urlsafe_b64decode(payload) decoded = base64.urlsafe_b64decode(payload)
json_data = json.loads(decoded.decode()) json_data = json.loads(decoded.decode())
result: dict[str, t.Any]
for result in json_data["items"]: for result in json_data["items"]:
if result["type"] == "WebPage": if result["type"] == "WebPage":
res.add( res.add(
+18 -6
View File
@@ -7,7 +7,9 @@ from urllib.parse import urlencode
import typing as t import typing as t
from searx.utils import html_to_text
from searx.result_types import EngineResults from searx.result_types import EngineResults
from searx.engines.swisscows import appropriate_locale
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response from searx.extended_types import SXNG_Response
@@ -33,8 +35,17 @@ paging = True
base_url = "https://api.swisscows.com" base_url = "https://api.swisscows.com"
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"} time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
swisscows_regions: list[str] = ["DE"]
"""Regions supported by swisscows News."""
def request(query: str, params: "OnlineParams") -> None: def request(query: str, params: "OnlineParams") -> None:
sxng_locale = params["searxng_locale"].split("-", maxsplit=1)[0]
locale: str = appropriate_locale(sxng_locale, swisscows_regions, default="de-DE")
if not locale:
return
freshness = "All" freshness = "All"
if params["time_range"]: if params["time_range"]:
freshness = time_range_map[params["time_range"]] freshness = time_range_map[params["time_range"]]
@@ -42,8 +53,8 @@ def request(query: str, params: "OnlineParams") -> None:
args = { args = {
"query": query, "query": query,
"itemsCount": results_per_page, "itemsCount": results_per_page,
"region": "de-DE", "region": locale,
"language": "de", "language": locale.split("-", maxsplit=1)[0],
"offset": (params["pageno"] - 1) * results_per_page, "offset": (params["pageno"] - 1) * results_per_page,
"freshness": freshness, "freshness": freshness,
"sortOrder": "Desc", "sortOrder": "Desc",
@@ -54,17 +65,18 @@ def request(query: str, params: "OnlineParams") -> None:
params["url"] = base_url + url_path params["url"] = base_url + url_path
def response(resp: "SXNG_Response"): def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults() res = EngineResults()
for result in resp.json()["items"]: result: dict[str, str]
for result in resp.json()["items"]: # pyright: ignore[reportAny]
res.add( res.add(
res.types.MainResult( res.types.MainResult(
url=result["uri"], url=result["uri"],
title=result["title"], title=html_to_text(result["title"]),
content=result["description"], content=result["description"],
publishedDate=datetime.fromisoformat(result["created"]), publishedDate=datetime.fromisoformat(result["created"]),
thumbnail=result.get("og:image"), thumbnail=result.get("og:image") or "",
) )
) )