From cbf97fd262f26b3d6e215bfb22dd9bb00981be23 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Tue, 19 May 2026 18:23:48 +0200 Subject: [PATCH] [feat] engines: add swisscows images engine The implementation is basically a 1:1 port of the reverse engineered swisscows JavaScript code. (it's been obfuscated, so I've restructured it and made the variable names idiomatic instead of obfuscated var names like "a", "o", "i") ```js /* e: "/v5/images/search" t: { itemsCount: "50" locale: "de-DE" offset: "50" query: "test" spellcheck: "true" } */ // HASH library used: https://github.com/h2non/jshashes function generateNonceAndSignature(queryParams, urlPath) { // urlPath = "/v5/images/search" // sort keys alphabetically and join to query string let queryStringSorted = '?' + U().stringify(queryParams, { arrayFormat: 'repeat', allowDots: !0 }).split('&').map(e => { let[key, value] = e.split('='); return [key, decodeURIComponent(value)] }).sort((e, t) => e[0].localeCompare(t[0])).map(e => e.join('=')).join('&'); function caesarShift(str, offset = 13) { const alphabet = 'abcdefghijklmnopqrstuvwxyz'; let result = []; for (let a = 0; a < str.length; a++) { let c = str[a], alphabetIndex = alphabet.indexOf(c.toLowerCase()); if ( - 1 !== alphabetIndex) { alphabetIndex += offset; while (alphabetIndex >= alphabet.length) alphabetIndex -= alphabet.length; c = c === c.toUpperCase() ? alphabet[alphabetIndex] : alphabet[alphabetIndex].toUpperCase() } result.push(c) } return result.join('') } const r = new (sha256Instance()).SHA256; const random = randomString(32); const randomShifted = caesarShift(random); let to_hash = [urlPath, queryStringSorted, randomShifted].join(''); let signature = r.b64(to_hash); signature = signature.replace(/=/g, '').replace(/\+/g, '-').replace(/\//g, '_'); return { nonce: random, signature: signature } } function randomString(length) { let t = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~', n = ''; for (let r = 0; r < length; r++) n += t.charAt(Math.floor(Math.random() * t.length)); return n } ``` --- searx/engines/swisscows_images.py | 142 ++++++++++++++++++++++++++++++ searx/settings.yml | 5 ++ 2 files changed, 147 insertions(+) create mode 100644 searx/engines/swisscows_images.py diff --git a/searx/engines/swisscows_images.py b/searx/engines/swisscows_images.py new file mode 100644 index 000000000..9293e3c47 --- /dev/null +++ b/searx/engines/swisscows_images.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=invalid-name +"""Swisscows images""" + +import json + +import random +import base64 +import codecs +import hashlib + +from urllib.parse import urlencode + +import typing as t + +from searx.result_types import EngineResults + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams + + +about = { + "website": "https://swisscows.com", + "wikidata_id": "Q22937452", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "JSON", +} + + +categories = ["images"] +paging = True +results_per_page = 50 + +base_url = "https://api.swisscows.com" + +CAESAR_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +NONCE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" + + +def generate_nonce(length: int = 32) -> str: + """ + Generate a random char sequence with the given length. + """ + return "".join([random.choice(NONCE_ALPHABET) for _ in range(length)]) + + +def caesar_shift_with_switch_case(s: str, offset: int = 13) -> str: + """ + Caesar shift by :py:obj:`offset` that additionally inverts the casing of all letters + (i.e. from lowercase to uppercase and vice versa). + """ + out = "" + for c in s: + if c.upper() in CAESAR_ALPHABET: + alphabet_index = ord(c.upper()) - ord("A") + shifted = CAESAR_ALPHABET[(alphabet_index + offset) % len(CAESAR_ALPHABET)] + case_switched = shifted.lower() if c.isupper() else shifted.upper() + out += case_switched + else: + out += c + return out + + +def sha256_hash_b64_url(s: str) -> str: + """ + Calculate the SHA256 hash and base64 URL-encodes it. + """ + hasher = hashlib.sha256() + hasher.update(s.encode()) + hashed_bytes = hasher.digest() + + # hashlib generates a byte digest, but since we need to convert it to base64, we + # need to do that by hand + hash_base64 = codecs.encode(hashed_bytes, "base64").decode("utf-8").rstrip('\n') + + hash_base64_url_encoded = hash_base64.replace("=", "").replace("+", '-').replace("/", '_') + return hash_base64_url_encoded + + +def generate_nonce_and_signature(url_path: str) -> tuple[str, str]: + """ + Generate "X-Request-Nonce" and "X-Request-Signature" which are required for accessing + Swisscows images (reverse engineered from their official website). + """ + nonce = generate_nonce() + nonce_shifted = caesar_shift_with_switch_case(nonce, 13) + + signature = sha256_hash_b64_url(url_path + nonce_shifted) + return (nonce, signature) + + +def request(query: str, params: "OnlineParams") -> None: + # engine only supports 2 pages + if params["pageno"] > 2: + params["url"] = None + return + + # the keys have to be sorted in alphabetic order, + # otherwise the generated signature won't be accepted! + args = { + "itemsCount": results_per_page, + "locale": "en-US", + "offset": (params["pageno"] - 1) * results_per_page, + "query": query, + "spellcheck": True, + } + url_path = f"/v5/images/search?{urlencode(args)}" + nonce, signature = generate_nonce_and_signature(url_path) + + params["headers"].update( + { + "X-Request-Nonce": nonce, + "X-Request-Signature": signature, + } + ) + params["url"] = base_url + url_path + + +def response(resp: "SXNG_Response"): + res = EngineResults() + + payload = resp.json()["payload"].split(".")[1] + decoded = base64.urlsafe_b64decode(payload + '=' * (4 - len(payload) % 4)) + json_data = json.loads(decoded.decode()) + + for result in json_data["items"]: + res.add( + res.types.LegacyResult( + { + "template": "images.html", + "url": result["url"], + "thumbnail_src": result["thumbnail"]["url"], + "img_src": result["contentUrl"], + "title": result["name"], + } + ) + ) + + return res diff --git a/searx/settings.yml b/searx/settings.yml index e4655fca0..208f94f06 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -2540,6 +2540,11 @@ engines: disabled: true inactive: true + - name: swisscows images + engine: swisscows_images + shortcut: swi + disabled: true + - name: wordnik engine: wordnik shortcut: wnik