Compare commits

...

6 Commits

Author SHA1 Message Date
Markus Heiser 0429198415 [mod] swisscows WEB: ignore video results from the first page
On the first page of the WEB search, there are, among other things, sections for
videos and news.  The video results from these sections should not be used as
results in the WEB search of SearXNG.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-06-06 18:04:19 +02:00
Markus Heiser e7cf57e9ae [mod] swisscows engines: add language / region support
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-06-06 18:04:19 +02:00
Bnyro ed369ac0ec [feat] engines: add support for swisscows general 2026-06-06 18:04:19 +02:00
Bnyro 94bdbb5c63 [feat] engines: add support for swisscows videos 2026-06-06 18:04:19 +02:00
Bnyro 465b5229c6 [feat] engines: add swisscows news engine 2026-06-06 18:04:19 +02:00
Bnyro cbf97fd262 [feat] engines: add swisscows images engine
The implementation is basically a 1:1 port of the reverse engineered
swisscows JavaScript code. (it's been obfuscated, so I've restructured it
and made the variable names idiomatic instead of obfuscated var names like "a", "o", "i")

```js
/*
e: "/v5/images/search"
t: {
	itemsCount: "50"
	locale: "de-DE"
	offset: "50"
	query: "test"
	spellcheck: "true"
}
*/
// HASH library used: https://github.com/h2non/jshashes
function generateNonceAndSignature(queryParams, urlPath) {
  // urlPath = "/v5/images/search"
  // sort keys alphabetically and join to query string
  let queryStringSorted = '?' + U().stringify(queryParams, {
    arrayFormat: 'repeat',
    allowDots: !0
  }).split('&').map(e => {
    let[key, value] = e.split('=');
    return [key, decodeURIComponent(value)]
  }).sort((e, t) => e[0].localeCompare(t[0])).map(e => e.join('=')).join('&');

  function caesarShift(str, offset = 13) {
      const alphabet = 'abcdefghijklmnopqrstuvwxyz';
      let result = [];
      for (let a = 0; a < str.length; a++) {
        let c = str[a],
        alphabetIndex = alphabet.indexOf(c.toLowerCase());
        if ( - 1 !== alphabetIndex) {
          alphabetIndex += offset;
          while (alphabetIndex >= alphabet.length) alphabetIndex -= alphabet.length;
          c = c === c.toUpperCase() ? alphabet[alphabetIndex] : alphabet[alphabetIndex].toUpperCase()
        }
        result.push(c)
      }
      return result.join('')
    }
  const r = new (sha256Instance()).SHA256;
  const random = randomString(32);
  const randomShifted = caesarShift(random);
  let to_hash = [urlPath, queryStringSorted, randomShifted].join('');
  let signature = r.b64(to_hash);
  signature = signature.replace(/=/g, '').replace(/\+/g, '-').replace(/\//g, '_');
  return {
    nonce: random,
    signature: signature
  }
}

function randomString(length) {
  let t = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~',
  n = '';
  for (let r = 0; r < length; r++) n += t.charAt(Math.floor(Math.random() * t.length));
  return n
}
```
2026-06-06 18:04:19 +02:00
3 changed files with 402 additions and 0 deletions
+287
View File
@@ -0,0 +1,287 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=invalid-name
"""Swisscows (general, images, videos)"""
import typing as t
import base64
import codecs
import hashlib
import json
import random
from datetime import datetime
from urllib.parse import urlencode
from babel.core import get_global
from searx.result_types import EngineResults, LegacyResult # pyright: ignore[reportPrivateLocalImportUsage]
from searx.utils import humanize_number, html_to_text
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://swisscows.com",
"wikidata_id": "Q22937452",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ["general"]
swisscows_category = "web" # possible: "web", "videos", "images"
results_per_page = 50
time_range_support = True
paging = True
base_url = "https://api.swisscows.com"
CAESAR_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
NONCE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
# fmt: off
swisscows_regions: list[str] = [
"AR", "AU", "AT", "BE", "BR", "CA", "CL", "CN", "DK", "FI",
"FR", "DE", "HK", "HU", "IN", "ID", "IT", "JP", "KR", "LV",
"MY", "MX", "NL", "NZ", "NO", "PH", "PL", "PT", "RU", "SA",
"ZA", "ES", "SE", "CH", "TW", "TR", "UA", "GB", "US"
]
"""Regions supported by swisscows."""
# fmt: on
# swisscows_languages = [
# "GB", "DE", "ES", "FR", "IT", "LV", "HU", "NL", "PT", "RU", "UA"
# ]
def appropriate_locale(searxng_locale: str, regions: list[str], default: str) -> str:
"""Returns the appropriate swisscows locale for the region or language
selected by the user. If no value is determined, ``default`` is returned
"""
_locale = searxng_locale.split("-")
if _locale[0] == "all":
return default
if len(_locale) == 1 or _locale[1] in regions:
return searxng_locale
sxng_lang = _locale[0]
if sxng_lang.upper() in regions:
return f"{sxng_lang}-{sxng_lang.upper()}"
likely_subtag: str | None = get_global("likely_subtags").get(sxng_lang)
if likely_subtag:
_tag: list[str] = likely_subtag.split("_")
if _tag[-1] in regions:
return f"{_tag[0]}-{_tag[-1]}"
return default
def generate_nonce(length: int = 32) -> str:
"""
Generate a random char sequence with the given length.
"""
return "".join([random.choice(NONCE_ALPHABET) for _ in range(length)])
def caesar_shift_with_switch_case(s: str, offset: int = 13) -> str:
"""
Caesar shift by :py:obj:`offset` that additionally inverts the casing of all letters
(i.e. from lowercase to uppercase and vice versa).
"""
out = ""
for c in s:
if c.upper() in CAESAR_ALPHABET:
alphabet_index = ord(c.upper()) - ord("A")
shifted = CAESAR_ALPHABET[(alphabet_index + offset) % len(CAESAR_ALPHABET)]
case_switched = shifted.lower() if c.isupper() else shifted.upper()
out += case_switched
else:
out += c
return out
def sha256_hash_b64_url(s: str) -> str:
"""
Calculate the SHA256 hash and base64 URL-encodes it.
"""
hasher = hashlib.sha256()
hasher.update(s.encode())
hashed_bytes = hasher.digest()
# hashlib generates a byte digest, but since we need to convert it to base64, we
# need to do that by hand
hash_base64 = codecs.encode(hashed_bytes, "base64").decode("utf-8").rstrip('\n')
hash_base64_url_encoded = hash_base64.replace("=", "").replace("+", '-').replace("/", '_')
return hash_base64_url_encoded
def generate_nonce_and_signature(base_path: str, args: dict[str, t.Any]) -> tuple[str, str]:
"""
Generate "X-Request-Nonce" and "X-Request-Signature" which are required for accessing
Swisscows images (reverse engineered from their official website).
"""
nonce = generate_nonce()
nonce_shifted = caesar_shift_with_switch_case(nonce, 13)
# in the path, all keys must be sorted in alphabetic order,
# otherwise the generated signature won't be accepted!
# additionally, the values may not be URL encoded, they have to be plain text
# hence we don't use urlencode here
args_sorted = sorted(args.items(), key=lambda arg: arg[0])
query_string = "&".join(f"{key}={value}" for (key, value) in args_sorted)
full_path = f"{base_path}?{query_string}"
signature = sha256_hash_b64_url(full_path + nonce_shifted)
return (nonce, signature)
maximum_page_size = {"web": 20, "images": 50, "videos": 10}
def init(_):
if swisscows_category not in ("web", "images", "videos"):
raise ValueError("illegal swisscows category: %s" % swisscows_category)
if results_per_page > maximum_page_size[swisscows_category]:
raise ValueError(
"results_per_page for swisscows %s can be at most %d"
% (swisscows_category, maximum_page_size[swisscows_category])
)
def request(query: str, params: "OnlineParams") -> None:
# swisscows images only supports 2 pages
if swisscows_category == "images" and params["pageno"] > 2:
params["url"] = None
return
locale = appropriate_locale(params["searxng_locale"], swisscows_regions, "en-US")
base_path = ""
args = dict[str, t.Any]
if swisscows_category == "web":
freshness = "All"
if params["time_range"]:
freshness = time_range_map[params["time_range"]]
args = {
"freshness": freshness,
"itemsCount": results_per_page,
"locale": locale,
"offset": (params["pageno"] - 1) * results_per_page,
"query": query,
"spellcheck": True,
}
base_path = "/v5/web/search"
elif swisscows_category == "images":
args = {
"itemsCount": results_per_page,
"locale": locale,
"offset": (params["pageno"] - 1) * results_per_page,
"query": query,
"spellcheck": True,
}
base_path = "/v5/images/search"
else:
args = {
"itemsCount": results_per_page,
"offset": (params["pageno"] - 1) * results_per_page,
"query": query,
"region": locale,
"spellcheck": True,
}
base_path = "/v2/videos/search"
nonce, signature = generate_nonce_and_signature(base_path, args)
params["headers"].update(
{
"X-Request-Nonce": nonce,
"X-Request-Signature": signature,
}
)
params["url"] = f"{base_url}{base_path}?{urlencode(args)}"
def _video_result(result: dict[str, str]) -> LegacyResult:
published_date = None
if result.get("datePublished"):
published_date = datetime.fromisoformat(result["datePublished"])
view_count = None
if result.get("viewCount"):
view_count = humanize_number(result["viewCount"]) # pyright: ignore[reportArgumentType]
return LegacyResult(
{
"template": "videos.html",
"url": result["url"],
"title": html_to_text(result.get("title") or result["name"]),
"content": result["description"],
"thumbnail": result.get("thumbnailUrl")
or result.get("thumbnail", {}).get("url"), # pyright: ignore[reportAttributeAccessIssue]
"length": result.get("duration"),
"iframe_src": result.get("embedUrl"),
"publishedDate": published_date,
"views": view_count,
}
)
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
json_data = resp.json()
# the payload encoding is only used for general and images,
# for videos the data gets returned directly as a normal JSON response
# payload is encoded as a JSON web token -> 3 parts, separated by "."
# the actual data is in the center of the encoded string
if "payload" in json_data:
payload = json_data["payload"].split(".")[1]
# pad with '=' to be valid base64
payload = payload + '=' * (4 - len(payload) % 4)
decoded = base64.urlsafe_b64decode(payload)
json_data = json.loads(decoded.decode())
result: dict[str, t.Any]
for result in json_data["items"]:
if result["type"] == "WebPage":
res.add(
res.types.MainResult(
url=result["url"],
title=result["name"],
content=html_to_text(result["description"]),
thumbnail=result.get("thumbnail", {}).get("url"),
)
)
elif swisscows_category == "videos" and result["type"] == "VideoCollection":
for video in result["hasPart"]:
res.add(_video_result(video))
elif result["type"] == "ImageObject":
res.add(
res.types.LegacyResult(
{
"template": "images.html",
"url": result["url"],
"thumbnail_src": result["thumbnail"]["url"],
"img_src": result["contentUrl"],
"title": result["name"],
}
)
)
elif result["type"] == "video":
res.add(_video_result(result))
return res
+83
View File
@@ -0,0 +1,83 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=invalid-name
"""Swisscows news"""
from datetime import datetime
from urllib.parse import urlencode
import typing as t
from searx.utils import html_to_text
from searx.result_types import EngineResults
from searx.engines.swisscows import appropriate_locale
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://swisscows.com",
"wikidata_id": "Q22937452",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ["news"]
results_per_page = 20
time_range_support = True
paging = True
base_url = "https://api.swisscows.com"
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
swisscows_regions: list[str] = ["DE"]
"""Regions supported by swisscows News."""
def request(query: str, params: "OnlineParams") -> None:
sxng_locale = params["searxng_locale"].split("-", maxsplit=1)[0]
locale: str = appropriate_locale(sxng_locale, swisscows_regions, default="de-DE")
if not locale:
return
freshness = "All"
if params["time_range"]:
freshness = time_range_map[params["time_range"]]
args = {
"query": query,
"itemsCount": results_per_page,
"region": locale,
"language": locale.split("-", maxsplit=1)[0],
"offset": (params["pageno"] - 1) * results_per_page,
"freshness": freshness,
"sortOrder": "Desc",
"sortBy": "Created",
}
url_path = f"/news/search?{urlencode(args)}"
params["url"] = base_url + url_path
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
result: dict[str, str]
for result in resp.json()["items"]: # pyright: ignore[reportAny]
res.add(
res.types.MainResult(
url=result["uri"],
title=html_to_text(result["title"]),
content=result["description"],
publishedDate=datetime.fromisoformat(result["created"]),
thumbnail=result.get("og:image") or "",
)
)
return res
+32
View File
@@ -2540,6 +2540,38 @@ engines:
disabled: true disabled: true
inactive: true inactive: true
- name: swisscows
engine: swisscows
categories: general
swisscows_category: web
results_per_page: 20
shortcut: sw
disabled: true
inactive: true
- name: swisscows images
engine: swisscows
categories: images
swisscows_category: images
shortcut: swi
disabled: true
inactive: true
- name: swisscows videos
engine: swisscows
categories: videos
swisscows_category: videos
results_per_page: 10
shortcut: swv
disabled: true
inactive: true
- name: swisscows news
engine: swisscows_news
shortcut: swn
disabled: true
inactive: true
- name: wordnik - name: wordnik
engine: wordnik engine: wordnik
shortcut: wnik shortcut: wnik