mirror of
https://github.com/searxng/searxng.git
synced 2026-06-06 18:07:18 +02:00
[feat] engines: add support for swisscows general
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# pylint: disable=invalid-name
|
# pylint: disable=invalid-name
|
||||||
"""Swisscows (images, videos)"""
|
"""Swisscows (general, images, videos)"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import codecs
|
import codecs
|
||||||
@@ -13,7 +13,7 @@ from urllib.parse import urlencode
|
|||||||
|
|
||||||
import typing as t
|
import typing as t
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults, LegacyResult
|
||||||
from searx.utils import humanize_number, html_to_text
|
from searx.utils import humanize_number, html_to_text
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
@@ -31,16 +31,20 @@ about = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
categories = ["videos"]
|
categories = ["general"]
|
||||||
swisscows_category = "videos" # possible: "videos", "images"
|
swisscows_category = "web" # possible: "web", "videos", "images"
|
||||||
paging = True
|
|
||||||
results_per_page = 50
|
results_per_page = 50
|
||||||
|
|
||||||
|
time_range_support = True
|
||||||
|
paging = True
|
||||||
|
|
||||||
base_url = "https://api.swisscows.com"
|
base_url = "https://api.swisscows.com"
|
||||||
|
|
||||||
CAESAR_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
CAESAR_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
NONCE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
|
NONCE_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
|
||||||
|
|
||||||
|
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
|
||||||
|
|
||||||
|
|
||||||
def generate_nonce(length: int = 32) -> str:
|
def generate_nonce(length: int = 32) -> str:
|
||||||
"""
|
"""
|
||||||
@@ -82,7 +86,7 @@ def sha256_hash_b64_url(s: str) -> str:
|
|||||||
return hash_base64_url_encoded
|
return hash_base64_url_encoded
|
||||||
|
|
||||||
|
|
||||||
def generate_nonce_and_signature(url_path: str) -> tuple[str, str]:
|
def generate_nonce_and_signature(base_path: str, args: dict[str, t.Any]) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Generate "X-Request-Nonce" and "X-Request-Signature" which are required for accessing
|
Generate "X-Request-Nonce" and "X-Request-Signature" which are required for accessing
|
||||||
Swisscows images (reverse engineered from their official website).
|
Swisscows images (reverse engineered from their official website).
|
||||||
@@ -90,16 +94,30 @@ def generate_nonce_and_signature(url_path: str) -> tuple[str, str]:
|
|||||||
nonce = generate_nonce()
|
nonce = generate_nonce()
|
||||||
nonce_shifted = caesar_shift_with_switch_case(nonce, 13)
|
nonce_shifted = caesar_shift_with_switch_case(nonce, 13)
|
||||||
|
|
||||||
signature = sha256_hash_b64_url(url_path + nonce_shifted)
|
# in the path, all keys must be sorted in alphabetic order,
|
||||||
|
# otherwise the generated signature won't be accepted!
|
||||||
|
# additionally, the values may not be URL encoded, they have to be plain text
|
||||||
|
# hence we don't use urlencode here
|
||||||
|
args_sorted = sorted(args.items(), key=lambda arg: arg[0])
|
||||||
|
query_string = "&".join(f"{key}={value}" for (key, value) in args_sorted)
|
||||||
|
full_path = f"{base_path}?{query_string}"
|
||||||
|
|
||||||
|
signature = sha256_hash_b64_url(full_path + nonce_shifted)
|
||||||
return (nonce, signature)
|
return (nonce, signature)
|
||||||
|
|
||||||
|
|
||||||
|
maximum_page_size = {"web": 20, "images": 50, "videos": 10}
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
def init(_):
|
||||||
if swisscows_category not in ("videos", "images"):
|
if swisscows_category not in ("web", "images", "videos"):
|
||||||
raise ValueError("illegal swisscows category: %s" % swisscows_category)
|
raise ValueError("illegal swisscows category: %s" % swisscows_category)
|
||||||
|
|
||||||
if swisscows_category == "videos" and results_per_page > 10:
|
if results_per_page > maximum_page_size[swisscows_category]:
|
||||||
raise ValueError("results_per_page for swisscows videos can be at most 10")
|
raise ValueError(
|
||||||
|
"results_per_page for swisscows %s can be at most %d"
|
||||||
|
% (swisscows_category, maximum_page_size[swisscows_category])
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: "OnlineParams") -> None:
|
def request(query: str, params: "OnlineParams") -> None:
|
||||||
@@ -108,10 +126,22 @@ def request(query: str, params: "OnlineParams") -> None:
|
|||||||
params["url"] = None
|
params["url"] = None
|
||||||
return
|
return
|
||||||
|
|
||||||
# the keys have to be sorted in alphabetic order,
|
base_path = ""
|
||||||
# otherwise the generated signature won't be accepted!
|
args = dict[str, t.Any]
|
||||||
url_path = ""
|
if swisscows_category == "web":
|
||||||
if swisscows_category == "images":
|
freshness = "All"
|
||||||
|
if params["time_range"]:
|
||||||
|
freshness = time_range_map[params["time_range"]]
|
||||||
|
args = {
|
||||||
|
"freshness": freshness,
|
||||||
|
"itemsCount": results_per_page,
|
||||||
|
"locale": "en-US",
|
||||||
|
"offset": (params["pageno"] - 1) * results_per_page,
|
||||||
|
"query": query,
|
||||||
|
"spellcheck": True,
|
||||||
|
}
|
||||||
|
base_path = "/v5/web/search"
|
||||||
|
elif swisscows_category == "images":
|
||||||
args = {
|
args = {
|
||||||
"itemsCount": results_per_page,
|
"itemsCount": results_per_page,
|
||||||
"locale": "en-US",
|
"locale": "en-US",
|
||||||
@@ -119,7 +149,7 @@ def request(query: str, params: "OnlineParams") -> None:
|
|||||||
"query": query,
|
"query": query,
|
||||||
"spellcheck": True,
|
"spellcheck": True,
|
||||||
}
|
}
|
||||||
url_path = f"/v5/images/search?{urlencode(args)}"
|
base_path = "/v5/images/search"
|
||||||
else:
|
else:
|
||||||
args = {
|
args = {
|
||||||
"itemsCount": results_per_page,
|
"itemsCount": results_per_page,
|
||||||
@@ -128,9 +158,9 @@ def request(query: str, params: "OnlineParams") -> None:
|
|||||||
"region": "en-US",
|
"region": "en-US",
|
||||||
"spellcheck": True,
|
"spellcheck": True,
|
||||||
}
|
}
|
||||||
url_path = f"/v2/videos/search?{urlencode(args)}"
|
base_path = "/v2/videos/search"
|
||||||
|
|
||||||
nonce, signature = generate_nonce_and_signature(url_path)
|
nonce, signature = generate_nonce_and_signature(base_path, args)
|
||||||
|
|
||||||
params["headers"].update(
|
params["headers"].update(
|
||||||
{
|
{
|
||||||
@@ -138,7 +168,31 @@ def request(query: str, params: "OnlineParams") -> None:
|
|||||||
"X-Request-Signature": signature,
|
"X-Request-Signature": signature,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
params["url"] = base_url + url_path
|
params["url"] = f"{base_url}{base_path}?{urlencode(args)}"
|
||||||
|
|
||||||
|
|
||||||
|
def _video_result(result: dict[str, t.Any]) -> LegacyResult:
|
||||||
|
published_date = None
|
||||||
|
if result.get("datePublished"):
|
||||||
|
published_date = datetime.fromisoformat(result["datePublished"])
|
||||||
|
|
||||||
|
view_count = None
|
||||||
|
if result.get("viewCount"):
|
||||||
|
view_count = humanize_number(result["viewCount"])
|
||||||
|
|
||||||
|
return LegacyResult(
|
||||||
|
{
|
||||||
|
"template": "videos.html",
|
||||||
|
"url": result["url"],
|
||||||
|
"title": html_to_text(result.get("title") or result["name"]),
|
||||||
|
"content": result["description"],
|
||||||
|
"thumbnail": result.get("thumbnailUrl") or result.get("thumbnail", {}).get("url"),
|
||||||
|
"length": result.get("duration"),
|
||||||
|
"iframe_src": result.get("embedUrl"),
|
||||||
|
"publishedDate": published_date,
|
||||||
|
"views": view_count,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def response(resp: "SXNG_Response"):
|
def response(resp: "SXNG_Response"):
|
||||||
@@ -146,7 +200,8 @@ def response(resp: "SXNG_Response"):
|
|||||||
|
|
||||||
json_data = resp.json()
|
json_data = resp.json()
|
||||||
|
|
||||||
# only appears to be the case for images, for videos the data doesn't seem to be encoded
|
# the payload encoding is only used for general and images,
|
||||||
|
# for videos the data gets returned directly as a normal JSON response
|
||||||
# payload is encoded as a JSON web token -> 3 parts, separated by "."
|
# payload is encoded as a JSON web token -> 3 parts, separated by "."
|
||||||
# the actual data is in the center of the encoded string
|
# the actual data is in the center of the encoded string
|
||||||
if "payload" in json_data:
|
if "payload" in json_data:
|
||||||
@@ -157,7 +212,19 @@ def response(resp: "SXNG_Response"):
|
|||||||
json_data = json.loads(decoded.decode())
|
json_data = json.loads(decoded.decode())
|
||||||
|
|
||||||
for result in json_data["items"]:
|
for result in json_data["items"]:
|
||||||
if swisscows_category == "images":
|
if result["type"] == "WebPage":
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
url=result["url"],
|
||||||
|
title=result["name"],
|
||||||
|
content=html_to_text(result["description"]),
|
||||||
|
thumbnail=result.get("thumbnail", {}).get("url"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif result["type"] == "VideoCollection":
|
||||||
|
for video in result["hasPart"]:
|
||||||
|
res.add(_video_result(video))
|
||||||
|
elif result["type"] == "ImageObject":
|
||||||
res.add(
|
res.add(
|
||||||
res.types.LegacyResult(
|
res.types.LegacyResult(
|
||||||
{
|
{
|
||||||
@@ -169,25 +236,7 @@ def response(resp: "SXNG_Response"):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
elif result["type"] == "video":
|
||||||
published_date = None
|
res.add(_video_result(result))
|
||||||
if result["datePublished"]:
|
|
||||||
published_date = datetime.fromisoformat(result["datePublished"])
|
|
||||||
|
|
||||||
res.add(
|
|
||||||
res.types.LegacyResult(
|
|
||||||
{
|
|
||||||
"template": "videos.html",
|
|
||||||
"url": result["url"],
|
|
||||||
"title": html_to_text(result["title"]),
|
|
||||||
"content": result["description"],
|
|
||||||
"thumbnail": result["thumbnailUrl"],
|
|
||||||
"length": result["duration"],
|
|
||||||
"iframe_src": result["embedUrl"],
|
|
||||||
"publishedDate": published_date,
|
|
||||||
"views": humanize_number(result["viewCount"]),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return res
|
return res
|
||||||
+14
-2
@@ -2540,25 +2540,37 @@ engines:
|
|||||||
disabled: true
|
disabled: true
|
||||||
inactive: true
|
inactive: true
|
||||||
|
|
||||||
|
- name: swisscows
|
||||||
|
engine: swisscows
|
||||||
|
categories: general
|
||||||
|
swisscows_category: web
|
||||||
|
results_per_page: 20
|
||||||
|
shortcut: sw
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: swisscows images
|
- name: swisscows images
|
||||||
engine: swisscows_extra
|
engine: swisscows
|
||||||
categories: images
|
categories: images
|
||||||
swisscows_category: images
|
swisscows_category: images
|
||||||
shortcut: swi
|
shortcut: swi
|
||||||
disabled: true
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: swisscows videos
|
- name: swisscows videos
|
||||||
engine: swisscows_extra
|
engine: swisscows
|
||||||
categories: videos
|
categories: videos
|
||||||
swisscows_category: videos
|
swisscows_category: videos
|
||||||
results_per_page: 10
|
results_per_page: 10
|
||||||
shortcut: swv
|
shortcut: swv
|
||||||
disabled: true
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: swisscows news
|
- name: swisscows news
|
||||||
engine: swisscows_news
|
engine: swisscows_news
|
||||||
shortcut: swn
|
shortcut: swn
|
||||||
disabled: true
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: wordnik
|
- name: wordnik
|
||||||
engine: wordnik
|
engine: wordnik
|
||||||
|
|||||||
Reference in New Issue
Block a user