[mod] swisscows WEB: ignore video results from the first page

On the first page of the WEB search, there are, among other things, sections for videos and news. The video results from these sections should not be used as results in the WEB search of SearXNG. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
[mod] swisscows engines: add language / region support
2026-06-22 17:48:33 +02:00 · 2026-06-06 18:04:19 +02:00 · 2026-06-06 18:04:19 +02:00 · 2026-06-06 18:04:19 +02:00 · 2026-06-06 18:04:19 +02:00 · 2026-06-06 18:04:19 +02:00
3 changed files with 402 additions and 0 deletions
@@ -0,0 +1,287 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # pylint: disable=invalid-name
 """Swisscows (general, images, videos)"""
 import typing as t
 import base64
 import codecs
 import hashlib
 import json
 import random
 from datetime import datetime
 from urllib.parse import urlencode
 from babel.core import get_global
 from searx.result_types import EngineResults, LegacyResult  # pyright: ignore[reportPrivateLocalImportUsage]
 from searx.utils import humanize_number, html_to_text
 if t.TYPE_CHECKING:
    from searx.extended_types import SXNG_Response
    from searx.search.processors import OnlineParams
 about = {
    "website": "https://swisscows.com",
    "wikidata_id": "Q22937452",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "JSON",
 }
 categories = ["general"]
 swisscows_category = "web"  # possible: "web", "videos", "images"
 results_per_page = 50
 time_range_support = True
 paging = True
 base_url = "https://api.swisscows.com"
 CAESAR_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 NONCE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
 time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
 # fmt: off
 swisscows_regions: list[str] = [
    "AR", "AU", "AT", "BE", "BR", "CA", "CL", "CN", "DK", "FI",
    "FR", "DE", "HK", "HU", "IN", "ID", "IT", "JP", "KR", "LV",
    "MY", "MX", "NL", "NZ", "NO", "PH", "PL", "PT", "RU", "SA",
    "ZA", "ES", "SE", "CH", "TW", "TR", "UA", "GB", "US"
 ]
 """Regions supported by swisscows."""
 # fmt: on
 # swisscows_languages = [
 #     "GB", "DE", "ES", "FR", "IT", "LV", "HU", "NL", "PT", "RU", "UA"
 # ]
 def appropriate_locale(searxng_locale: str, regions: list[str], default: str) -> str:
    """Returns the appropriate swisscows locale for the region or language
    selected by the user.  If no value is determined, ``default`` is returned
    """
    _locale = searxng_locale.split("-")
    if _locale[0] == "all":
        return default
    if len(_locale) == 1 or _locale[1] in regions:
        return searxng_locale
    sxng_lang = _locale[0]
    if sxng_lang.upper() in regions:
        return f"{sxng_lang}-{sxng_lang.upper()}"
    likely_subtag: str | None = get_global("likely_subtags").get(sxng_lang)
    if likely_subtag:
        _tag: list[str] = likely_subtag.split("_")
        if _tag[-1] in regions:
            return f"{_tag[0]}-{_tag[-1]}"
    return default
 def generate_nonce(length: int = 32) -> str:
    """
    Generate a random char sequence with the given length.
    """
    return "".join([random.choice(NONCE_ALPHABET) for _ in range(length)])
 def caesar_shift_with_switch_case(s: str, offset: int = 13) -> str:
    """
    Caesar shift by :py:obj:`offset` that additionally inverts the casing of all letters
    (i.e. from lowercase to uppercase and vice versa).
    """
    out = ""
    for c in s:
        if c.upper() in CAESAR_ALPHABET:
            alphabet_index = ord(c.upper()) - ord("A")
            shifted = CAESAR_ALPHABET[(alphabet_index + offset) % len(CAESAR_ALPHABET)]
            case_switched = shifted.lower() if c.isupper() else shifted.upper()
            out += case_switched
        else:
            out += c
    return out
 def sha256_hash_b64_url(s: str) -> str:
    """
    Calculate the SHA256 hash and base64 URL-encodes it.
    """
    hasher = hashlib.sha256()
    hasher.update(s.encode())
    hashed_bytes = hasher.digest()
    # hashlib generates a byte digest, but since we need to convert it to base64, we
    # need to do that by hand
    hash_base64 = codecs.encode(hashed_bytes, "base64").decode("utf-8").rstrip('\n')
    hash_base64_url_encoded = hash_base64.replace("=", "").replace("+", '-').replace("/", '_')
    return hash_base64_url_encoded
 def generate_nonce_and_signature(base_path: str, args: dict[str, t.Any]) -> tuple[str, str]:
    """
    Generate "X-Request-Nonce" and "X-Request-Signature" which are required for accessing
    Swisscows images (reverse engineered from their official website).
    """
    nonce = generate_nonce()
    nonce_shifted = caesar_shift_with_switch_case(nonce, 13)
    # in the path, all keys must be sorted in alphabetic order,
    # otherwise the generated signature won't be accepted!
    # additionally, the values may not be URL encoded, they have to be plain text
    # hence we don't use urlencode here
    args_sorted = sorted(args.items(), key=lambda arg: arg[0])
    query_string = "&".join(f"{key}={value}" for (key, value) in args_sorted)
    full_path = f"{base_path}?{query_string}"
    signature = sha256_hash_b64_url(full_path + nonce_shifted)
    return (nonce, signature)
 maximum_page_size = {"web": 20, "images": 50, "videos": 10}
 def init(_):
    if swisscows_category not in ("web", "images", "videos"):
        raise ValueError("illegal swisscows category: %s" % swisscows_category)
    if results_per_page > maximum_page_size[swisscows_category]:
        raise ValueError(
            "results_per_page for swisscows %s can be at most %d"
            % (swisscows_category, maximum_page_size[swisscows_category])
        )
 def request(query: str, params: "OnlineParams") -> None:
    # swisscows images only supports 2 pages
    if swisscows_category == "images" and params["pageno"] > 2:
        params["url"] = None
        return
    locale = appropriate_locale(params["searxng_locale"], swisscows_regions, "en-US")
    base_path = ""
    args = dict[str, t.Any]
    if swisscows_category == "web":
        freshness = "All"
        if params["time_range"]:
            freshness = time_range_map[params["time_range"]]
        args = {
            "freshness": freshness,
            "itemsCount": results_per_page,
            "locale": locale,
            "offset": (params["pageno"] - 1) * results_per_page,
            "query": query,
            "spellcheck": True,
        }
        base_path = "/v5/web/search"
    elif swisscows_category == "images":
        args = {
            "itemsCount": results_per_page,
            "locale": locale,
            "offset": (params["pageno"] - 1) * results_per_page,
            "query": query,
            "spellcheck": True,
        }
        base_path = "/v5/images/search"
    else:
        args = {
            "itemsCount": results_per_page,
            "offset": (params["pageno"] - 1) * results_per_page,
            "query": query,
            "region": locale,
            "spellcheck": True,
        }
        base_path = "/v2/videos/search"
    nonce, signature = generate_nonce_and_signature(base_path, args)
    params["headers"].update(
        {
            "X-Request-Nonce": nonce,
            "X-Request-Signature": signature,
        }
    )
    params["url"] = f"{base_url}{base_path}?{urlencode(args)}"
 def _video_result(result: dict[str, str]) -> LegacyResult:
    published_date = None
    if result.get("datePublished"):
        published_date = datetime.fromisoformat(result["datePublished"])
    view_count = None
    if result.get("viewCount"):
        view_count = humanize_number(result["viewCount"])  # pyright: ignore[reportArgumentType]
    return LegacyResult(
        {
            "template": "videos.html",
            "url": result["url"],
            "title": html_to_text(result.get("title") or result["name"]),
            "content": result["description"],
            "thumbnail": result.get("thumbnailUrl")
            or result.get("thumbnail", {}).get("url"),  # pyright: ignore[reportAttributeAccessIssue]
            "length": result.get("duration"),
            "iframe_src": result.get("embedUrl"),
            "publishedDate": published_date,
            "views": view_count,
        }
    )
 def response(resp: "SXNG_Response") -> EngineResults:
    res = EngineResults()
    json_data = resp.json()
    # the payload encoding is only used for general and images,
    # for videos the data gets returned directly as a normal JSON response
    # payload is encoded as a JSON web token -> 3 parts, separated by "."
    # the actual data is in the center of the encoded string
    if "payload" in json_data:
        payload = json_data["payload"].split(".")[1]
        # pad with '=' to be valid base64
        payload = payload + '=' * (4 - len(payload) % 4)
        decoded = base64.urlsafe_b64decode(payload)
        json_data = json.loads(decoded.decode())
    result: dict[str, t.Any]
    for result in json_data["items"]:
        if result["type"] == "WebPage":
            res.add(
                res.types.MainResult(
                    url=result["url"],
                    title=result["name"],
                    content=html_to_text(result["description"]),
                    thumbnail=result.get("thumbnail", {}).get("url"),
                )
            )
        elif swisscows_category == "videos" and result["type"] == "VideoCollection":
            for video in result["hasPart"]:
                res.add(_video_result(video))
        elif result["type"] == "ImageObject":
            res.add(
                res.types.LegacyResult(
                    {
                        "template": "images.html",
                        "url": result["url"],
                        "thumbnail_src": result["thumbnail"]["url"],
                        "img_src": result["contentUrl"],
                        "title": result["name"],
                    }
                )
            )
        elif result["type"] == "video":
            res.add(_video_result(result))
    return res
@@ -0,0 +1,83 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # pylint: disable=invalid-name
 """Swisscows news"""
 from datetime import datetime
 from urllib.parse import urlencode
 import typing as t
 from searx.utils import html_to_text
 from searx.result_types import EngineResults
 from searx.engines.swisscows import appropriate_locale
 if t.TYPE_CHECKING:
    from searx.extended_types import SXNG_Response
    from searx.search.processors import OnlineParams
 about = {
    "website": "https://swisscows.com",
    "wikidata_id": "Q22937452",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "JSON",
 }
 categories = ["news"]
 results_per_page = 20
 time_range_support = True
 paging = True
 base_url = "https://api.swisscows.com"
 time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
 swisscows_regions: list[str] = ["DE"]
 """Regions supported by swisscows News."""
 def request(query: str, params: "OnlineParams") -> None:
    sxng_locale = params["searxng_locale"].split("-", maxsplit=1)[0]
    locale: str = appropriate_locale(sxng_locale, swisscows_regions, default="de-DE")
    if not locale:
        return
    freshness = "All"
    if params["time_range"]:
        freshness = time_range_map[params["time_range"]]
    args = {
        "query": query,
        "itemsCount": results_per_page,
        "region": locale,
        "language": locale.split("-", maxsplit=1)[0],
        "offset": (params["pageno"] - 1) * results_per_page,
        "freshness": freshness,
        "sortOrder": "Desc",
        "sortBy": "Created",
    }
    url_path = f"/news/search?{urlencode(args)}"
    params["url"] = base_url + url_path
 def response(resp: "SXNG_Response") -> EngineResults:
    res = EngineResults()
    result: dict[str, str]
    for result in resp.json()["items"]:  # pyright: ignore[reportAny]
        res.add(
            res.types.MainResult(
                url=result["uri"],
                title=html_to_text(result["title"]),
                content=result["description"],
                publishedDate=datetime.fromisoformat(result["created"]),
                thumbnail=result.get("og:image") or "",
            )
        )
    return res
@@ -2540,6 +2540,38 @@ engines:
    disabled: true
    inactive: true
  - name: swisscows
    engine: swisscows
    categories: general
    swisscows_category: web
    results_per_page: 20
    shortcut: sw
    disabled: true
    inactive: true
  - name: swisscows images
    engine: swisscows
    categories: images
    swisscows_category: images
    shortcut: swi
    disabled: true
    inactive: true
  - name: swisscows videos
    engine: swisscows
    categories: videos
    swisscows_category: videos
    results_per_page: 10
    shortcut: swv
    disabled: true
    inactive: true
  - name: swisscows news
    engine: swisscows_news
    shortcut: swn
    disabled: true
    inactive: true
  - name: wordnik
    engine: wordnik
    shortcut: wnik
Author	SHA1	Message	Date
Markus Heiser	0429198415	[mod] swisscows WEB: ignore video results from the first page On the first page of the WEB search, there are, among other things, sections for videos and news. The video results from these sections should not be used as results in the WEB search of SearXNG. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>	2026-06-06 18:04:19 +02:00
Markus Heiser	e7cf57e9ae	[mod] swisscows engines: add language / region support Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>	2026-06-06 18:04:19 +02:00
Bnyro	ed369ac0ec	[feat] engines: add support for swisscows general	2026-06-06 18:04:19 +02:00
Bnyro	94bdbb5c63	[feat] engines: add support for swisscows videos	2026-06-06 18:04:19 +02:00
Bnyro	465b5229c6	[feat] engines: add swisscows news engine	2026-06-06 18:04:19 +02:00
Bnyro	cbf97fd262	[feat] engines: add swisscows images engine The implementation is basically a 1:1 port of the reverse engineered swisscows JavaScript code. (it's been obfuscated, so I've restructured it and made the variable names idiomatic instead of obfuscated var names like "a", "o", "i") ```js /* e: "/v5/images/search" t: { itemsCount: "50" locale: "de-DE" offset: "50" query: "test" spellcheck: "true" } / // HASH library used: https://github.com/h2non/jshashes function generateNonceAndSignature(queryParams, urlPath) { // urlPath = "/v5/images/search" // sort keys alphabetically and join to query string let queryStringSorted = '?' + U().stringify(queryParams, { arrayFormat: 'repeat', allowDots: !0 }).split('&').map(e => { let[key, value] = e.split('='); return [key, decodeURIComponent(value)] }).sort((e, t) => e[0].localeCompare(t[0])).map(e => e.join('=')).join('&'); function caesarShift(str, offset = 13) { const alphabet = 'abcdefghijklmnopqrstuvwxyz'; let result = []; for (let a = 0; a < str.length; a++) { let c = str[a], alphabetIndex = alphabet.indexOf(c.toLowerCase()); if ( - 1 !== alphabetIndex) { alphabetIndex += offset; while (alphabetIndex >= alphabet.length) alphabetIndex -= alphabet.length; c = c === c.toUpperCase() ? alphabet[alphabetIndex] : alphabet[alphabetIndex].toUpperCase() } result.push(c) } return result.join('') } const r = new (sha256Instance()).SHA256; const random = randomString(32); const randomShifted = caesarShift(random); let to_hash = [urlPath, queryStringSorted, randomShifted].join(''); let signature = r.b64(to_hash); signature = signature.replace(/=/g, '').replace(/\+/g, '-').replace(/\//g, '_'); return { nonce: random, signature: signature } } function randomString(length) { let t = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~', n = ''; for (let r = 0; r < length; r++) n += t.charAt(Math.floor(Math.random() t.length)); return n } ```	2026-06-06 18:04:19 +02:00