mirror of
https://github.com/searxng/searxng.git
synced 2026-06-12 21:07:51 +02:00
191 lines
5.8 KiB
Python
191 lines
5.8 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Kagi_ is a paid, privacy-focused search engine.
|
|
|
|
Using it requires an API key. If you have a Kagi account, you can obtain an API
|
|
key in the `API portal`_.
|
|
|
|
To enable Kagi, add the following to the ``engines`` seciton of
|
|
``settings.yml``:
|
|
|
|
.. code:: yaml
|
|
|
|
- name: kagi
|
|
engine: kagi
|
|
categories: [general, web]
|
|
shortcut: kg
|
|
api_key: ""
|
|
kagi_categ: search
|
|
|
|
- name: kagi.news
|
|
engine: kagi
|
|
categories: [news, web]
|
|
shortcut: kgn
|
|
api_key: ""
|
|
kagi_categ: news
|
|
|
|
- name: kagi.images
|
|
engine: kagi
|
|
categories: [images, web]
|
|
shortcut: kgi
|
|
paging: false
|
|
api_key: ""
|
|
kagi_categ: images
|
|
|
|
- name: kagi.videos
|
|
engine: kagi
|
|
categories: [videos, web]
|
|
shortcut: kgv
|
|
api_key: ""
|
|
kagi_categ: videos
|
|
|
|
.. _Kagi: https://kagi.com
|
|
.. _Api Portal: https://help.kagi.com/kagi/api/overview.html
|
|
"""
|
|
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
import typing as t
|
|
import html
|
|
|
|
from searx.extended_types import SXNG_Response
|
|
from searx.result_types import EngineResults
|
|
from searx.utils import parse_duration_string
|
|
|
|
if t.TYPE_CHECKING:
|
|
from searx.search.processors import OnlineParams
|
|
|
|
TimeRangeType = t.Literal["day", "week", "month", "year"]
|
|
about = {
|
|
"website": "https://kagi.com",
|
|
"wikidata_id": "Q26000117",
|
|
"official_api_documentation": "https://kagi.com/api/docs/openapi",
|
|
"use_official_api": True,
|
|
"require_api_key": True,
|
|
"results": "JSON",
|
|
}
|
|
|
|
paging = True
|
|
"""All categories except the ``images`` category support paging."""
|
|
safesearch = True
|
|
time_range_support = True
|
|
|
|
categories = ["general"]
|
|
kagi_categ: t.Literal["search", "images", "news", "videos"] = "search"
|
|
"""Search category. Supported values: "search" (general), "images", "news", "videos"."""
|
|
|
|
base_url = "https://kagi.com"
|
|
|
|
safe_search_map = {0: False, 1: True, 2: True}
|
|
time_range_to_days_map: dict[TimeRangeType, int] = {"day": 1, "week": 7, "month": 30, "year": 365}
|
|
|
|
api_key = ""
|
|
"""Kagi API key. Required for using this engine."""
|
|
|
|
|
|
def init(_):
|
|
if not api_key:
|
|
raise ValueError("api_key is required for using kagi")
|
|
|
|
if kagi_categ not in ("search", "images", "news", "videos"):
|
|
raise ValueError(f"Unsupported category: {kagi_categ}") # pyright: ignore[reportUnreachable]
|
|
|
|
|
|
def request(query: str, params: "OnlineParams"):
|
|
# According to the API docs, Kagi supports at maximum page 10
|
|
if params["pageno"] > 10:
|
|
return
|
|
|
|
params["headers"]["Authorization"] = f"Bearer {api_key}"
|
|
params["url"] = f"{base_url}/api/v1/search"
|
|
|
|
filters = {}
|
|
time_range = params.get("time_range")
|
|
if time_range:
|
|
# Kagi expects the minimum date to return results from as argument to `after`
|
|
time_period = timedelta(days=time_range_to_days_map[time_range])
|
|
oldest_result_date = datetime.now() - time_period
|
|
filters["after"] = oldest_result_date.strftime("%Y-%m-%d")
|
|
|
|
# there doesn't seem to be a list of languages anywhere,
|
|
# so we just assume that it supports all languages
|
|
|
|
filters["region"] = "no_region"
|
|
if params["searxng_locale"] != "all":
|
|
_locale = params["searxng_locale"].split("-")
|
|
if len(_locale) > 1:
|
|
filters["region"] = _locale[-1].lower()
|
|
|
|
args: dict[str, t.Any] = {
|
|
"query": query,
|
|
"page": params["pageno"],
|
|
"workflow": kagi_categ,
|
|
"safe_search": safe_search_map[params["safesearch"]],
|
|
"filters": filters,
|
|
}
|
|
|
|
params["method"] = "POST"
|
|
params["json"] = args
|
|
|
|
|
|
def response(resp: "SXNG_Response") -> EngineResults:
|
|
res = EngineResults()
|
|
|
|
json_data: dict[str, t.Any] = resp.json()
|
|
|
|
if kagi_categ in ("images", "videos"):
|
|
# the JSON key is "image" for "images" and "video" for "videos"
|
|
json_results = json_data["data"][kagi_categ[:-1]]
|
|
else:
|
|
json_results = json_data["data"][kagi_categ]
|
|
|
|
for result in json_results:
|
|
published_date: datetime | None = None
|
|
if result.get("time"):
|
|
published_date = datetime.fromisoformat(result["time"])
|
|
|
|
if kagi_categ in ("search", "news"):
|
|
res.add(
|
|
res.types.MainResult(
|
|
url=result["url"],
|
|
title=html.unescape(result["title"]),
|
|
content=html.unescape(result["snippet"]),
|
|
thumbnail=result.get("image", {}).get("url") or "",
|
|
publishedDate=published_date,
|
|
)
|
|
)
|
|
elif kagi_categ == "images":
|
|
res.add(
|
|
res.types.Image(
|
|
url=result["url"],
|
|
title=html.unescape(result.get("title")),
|
|
img_src=result.get("image", {}).get("url"),
|
|
resolution=f"{result['image']['width']}x{result['image']['height']}",
|
|
thumbnail_src=result.get("props", {}).get("thumbnail", {}).get("url"),
|
|
)
|
|
)
|
|
elif kagi_categ == "videos":
|
|
length: timedelta | None = None
|
|
if result["props"].get("duration"):
|
|
length = parse_duration_string(result["props"]["duration"])
|
|
|
|
res.add(
|
|
res.types.LegacyResult(
|
|
{
|
|
"template": "videos.html",
|
|
"url": result["url"],
|
|
"title": html.unescape(result["title"]),
|
|
"content": html.unescape(result["snippet"]),
|
|
"thumbnail": result.get("image", {}).get("url"),
|
|
"publishedDate": published_date,
|
|
"author": result["props"].get("creator_name"),
|
|
"length": length,
|
|
}
|
|
)
|
|
)
|
|
|
|
for suggestion in json_data["data"].get("related_search", []):
|
|
res.add(res.types.LegacyResult({"suggestion": suggestion["title"]}))
|
|
|
|
return res
|