diff --git a/docs/dev/engines/online/kagi.rst b/docs/dev/engines/online/kagi.rst new file mode 100644 index 000000000..aabcdb67b --- /dev/null +++ b/docs/dev/engines/online/kagi.rst @@ -0,0 +1,9 @@ +.. _kagi engines: + +============ +Kagi Engines +============ + +.. automodule:: searx.engines.kagi + :members: + diff --git a/searx/engines/kagi.py b/searx/engines/kagi.py new file mode 100644 index 000000000..bcf9dbf20 --- /dev/null +++ b/searx/engines/kagi.py @@ -0,0 +1,190 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Kagi_ is a paid, privacy-focused search engine. + +Using it requires an API key. If you have a Kagi account, you can obtain an API +key in the `API portal`_. + +To enable Kagi, add the following to the ``engines`` seciton of +``settings.yml``: + +.. code:: yaml + + - name: kagi + engine: kagi + categories: [general, web] + shortcut: kg + api_key: "" + kagi_categ: search + + - name: kagi.news + engine: kagi + categories: [news, web] + shortcut: kgn + api_key: "" + kagi_categ: news + + - name: kagi.images + engine: kagi + categories: [images, web] + shortcut: kgi + paging: false + api_key: "" + kagi_categ: images + + - name: kagi.videos + engine: kagi + categories: [videos, web] + shortcut: kgv + api_key: "" + kagi_categ: videos + +.. _Kagi: https://kagi.com +.. _Api Portal: https://help.kagi.com/kagi/api/overview.html +""" + + +from datetime import datetime, timedelta + +import typing as t +import html + +from searx.extended_types import SXNG_Response +from searx.result_types import EngineResults +from searx.utils import parse_duration_string + +if t.TYPE_CHECKING: + from searx.search.processors import OnlineParams + +TimeRangeType = t.Literal["day", "week", "month", "year"] +about = { + "website": "https://kagi.com", + "wikidata_id": "Q26000117", + "official_api_documentation": "https://kagi.com/api/docs/openapi", + "use_official_api": True, + "require_api_key": True, + "results": "JSON", +} + +paging = True +"""All categories except the ``images`` category support paging.""" +safesearch = True +time_range_support = True + +categories = ["general"] +kagi_categ: t.Literal["search", "images", "news", "videos"] = "search" +"""Search category. Supported values: "search" (general), "images", "news", "videos".""" + +base_url = "https://kagi.com" + +safe_search_map = {0: False, 1: True, 2: True} +time_range_to_days_map: dict[TimeRangeType, int] = {"day": 1, "week": 7, "month": 30, "year": 365} + +api_key = "" +"""Kagi API key. Required for using this engine.""" + + +def init(_): + if not api_key: + raise ValueError("api_key is required for using kagi") + + if kagi_categ not in ("search", "images", "news", "videos"): + raise ValueError(f"Unsupported category: {kagi_categ}") # pyright: ignore[reportUnreachable] + + +def request(query: str, params: "OnlineParams"): + # According to the API docs, Kagi supports at maximum page 10 + if params["pageno"] > 10: + return + + params["headers"]["Authorization"] = f"Bearer {api_key}" + params["url"] = f"{base_url}/api/v1/search" + + filters = {} + time_range = params.get("time_range") + if time_range: + # Kagi expects the minimum date to return results from as argument to `after` + time_period = timedelta(days=time_range_to_days_map[time_range]) + oldest_result_date = datetime.now() - time_period + filters["after"] = oldest_result_date.strftime("%Y-%m-%d") + + # there doesn't seem to be a list of languages anywhere, + # so we just assume that it supports all languages + + filters["region"] = "no_region" + if params["searxng_locale"] != "all": + _locale = params["searxng_locale"].split("-") + if len(_locale) > 1: + filters["region"] = _locale[-1].lower() + + args: dict[str, t.Any] = { + "query": query, + "page": params["pageno"], + "workflow": kagi_categ, + "safe_search": safe_search_map[params["safesearch"]], + "filters": filters, + } + + params["method"] = "POST" + params["json"] = args + + +def response(resp: "SXNG_Response") -> EngineResults: + res = EngineResults() + + json_data: dict[str, t.Any] = resp.json() + + if kagi_categ in ("images", "videos"): + # the JSON key is "image" for "images" and "video" for "videos" + json_results = json_data["data"][kagi_categ[:-1]] + else: + json_results = json_data["data"][kagi_categ] + + for result in json_results: + published_date: datetime | None = None + if result.get("time"): + published_date = datetime.fromisoformat(result["time"]) + + if kagi_categ in ("search", "news"): + res.add( + res.types.MainResult( + url=result["url"], + title=html.unescape(result["title"]), + content=html.unescape(result["snippet"]), + thumbnail=result.get("image", {}).get("url") or "", + publishedDate=published_date, + ) + ) + elif kagi_categ == "images": + res.add( + res.types.Image( + url=result["url"], + title=html.unescape(result.get("title")), + img_src=result.get("image", {}).get("url"), + resolution=f"{result['image']['width']}x{result['image']['height']}", + thumbnail_src=result.get("props", {}).get("thumbnail", {}).get("url"), + ) + ) + elif kagi_categ == "videos": + length: timedelta | None = None + if result["props"].get("duration"): + length = parse_duration_string(result["props"]["duration"]) + + res.add( + res.types.LegacyResult( + { + "template": "videos.html", + "url": result["url"], + "title": html.unescape(result["title"]), + "content": html.unescape(result["snippet"]), + "thumbnail": result.get("image", {}).get("url"), + "publishedDate": published_date, + "author": result["props"].get("creator_name"), + "length": length, + } + ) + ) + + for suggestion in json_data["data"].get("related_search", []): + res.add(res.types.LegacyResult({"suggestion": suggestion["title"]})) + + return res diff --git a/searx/settings.yml b/searx/settings.yml index 19b30c781..97573f241 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1294,6 +1294,35 @@ engines: shortcut: iq disabled: true + # - name: kagi + # engine: kagi + # categories: [general, web] + # shortcut: kg + # api_key: "" # required + # kagi_categ: search + + # - name: kagi.news + # engine: kagi + # categories: [news, web] + # shortcut: kgn + # api_key: "" # required + # kagi_categ: news + + # - name: kagi.images + # engine: kagi + # categories: [images, web] + # paging: false + # shortcut: kgi + # api_key: "" # required + # kagi_categ: images + + # - name: kagi.videos + # engine: kagi + # categories: [videos, web] + # shortcut: kgv + # api_key: "" # required + # kagi_categ: videos + - name: jisho engine: jisho shortcut: js