diff --git a/docs/dev/engines/online/karmasearch.rst b/docs/dev/engines/online/karmasearch.rst deleted file mode 100644 index d76ea2409..000000000 --- a/docs/dev/engines/online/karmasearch.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. _karmasearch engine: - -=========== -Karmasearch -=========== - -.. automodule:: searx.engines.karmasearch - :members: diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index 7c5b102b6..54e5eaed9 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -5740,186 +5740,6 @@ "zu-ZA": "ZA" } }, - "karmasearch": { - "all_locale": null, - "custom": {}, - "data_type": "traits_v1", - "languages": {}, - "regions": { - "da-DK": "da-DK", - "de-AT": "de-AT", - "de-CH": "de-CH", - "de-DE": "de-DE", - "en-AU": "en-AU", - "en-CA": "en-CA", - "en-GB": "en-GB", - "en-ID": "en-ID", - "en-IN": "en-IN", - "en-MY": "en-MY", - "en-NZ": "en-NZ", - "en-PH": "en-PH", - "en-US": "en-US", - "en-ZA": "en-ZA", - "es-AR": "es-AR", - "es-CL": "es-CL", - "es-ES": "es-ES", - "es-MX": "es-MX", - "es-US": "es-US", - "fi-FI": "fi-FI", - "fr-BE": "fr-BE", - "fr-CA": "fr-CA", - "fr-CH": "fr-CH", - "fr-FR": "fr-FR", - "it-IT": "it-IT", - "ja-JP": "ja-JP", - "ko-KR": "ko-KR", - "nl-BE": "nl-BE", - "nl-NL": "nl-NL", - "pl-PL": "pl-PL", - "pt-BR": "pt-BR", - "ru-RU": "ru-RU", - "sv-SE": "sv-SE", - "tr-TR": "tr-TR", - "zh-CN": "zh-CN", - "zh-HK": "zh-HK", - "zh-TW": "zh-TW" - } - }, - "karmasearch images": { - "all_locale": null, - "custom": {}, - "data_type": "traits_v1", - "languages": {}, - "regions": { - "da-DK": "da-DK", - "de-AT": "de-AT", - "de-CH": "de-CH", - "de-DE": "de-DE", - "en-AU": "en-AU", - "en-CA": "en-CA", - "en-GB": "en-GB", - "en-ID": "en-ID", - "en-IN": "en-IN", - "en-MY": "en-MY", - "en-NZ": "en-NZ", - "en-PH": "en-PH", - "en-US": "en-US", - "en-ZA": "en-ZA", - "es-AR": "es-AR", - "es-CL": "es-CL", - "es-ES": "es-ES", - "es-MX": "es-MX", - "es-US": "es-US", - "fi-FI": "fi-FI", - "fr-BE": "fr-BE", - "fr-CA": "fr-CA", - "fr-CH": "fr-CH", - "fr-FR": "fr-FR", - "it-IT": "it-IT", - "ja-JP": "ja-JP", - "ko-KR": "ko-KR", - "nl-BE": "nl-BE", - "nl-NL": "nl-NL", - "pl-PL": "pl-PL", - "pt-BR": "pt-BR", - "ru-RU": "ru-RU", - "sv-SE": "sv-SE", - "tr-TR": "tr-TR", - "zh-CN": "zh-CN", - "zh-HK": "zh-HK", - "zh-TW": "zh-TW" - } - }, - "karmasearch news": { - "all_locale": null, - "custom": {}, - "data_type": "traits_v1", - "languages": {}, - "regions": { - "da-DK": "da-DK", - "de-AT": "de-AT", - "de-CH": "de-CH", - "de-DE": "de-DE", - "en-AU": "en-AU", - "en-CA": "en-CA", - "en-GB": "en-GB", - "en-ID": "en-ID", - "en-IN": "en-IN", - "en-MY": "en-MY", - "en-NZ": "en-NZ", - "en-PH": "en-PH", - "en-US": "en-US", - "en-ZA": "en-ZA", - "es-AR": "es-AR", - "es-CL": "es-CL", - "es-ES": "es-ES", - "es-MX": "es-MX", - "es-US": "es-US", - "fi-FI": "fi-FI", - "fr-BE": "fr-BE", - "fr-CA": "fr-CA", - "fr-CH": "fr-CH", - "fr-FR": "fr-FR", - "it-IT": "it-IT", - "ja-JP": "ja-JP", - "ko-KR": "ko-KR", - "nl-BE": "nl-BE", - "nl-NL": "nl-NL", - "pl-PL": "pl-PL", - "pt-BR": "pt-BR", - "ru-RU": "ru-RU", - "sv-SE": "sv-SE", - "tr-TR": "tr-TR", - "zh-CN": "zh-CN", - "zh-HK": "zh-HK", - "zh-TW": "zh-TW" - } - }, - "karmasearch videos": { - "all_locale": null, - "custom": {}, - "data_type": "traits_v1", - "languages": {}, - "regions": { - "da-DK": "da-DK", - "de-AT": "de-AT", - "de-CH": "de-CH", - "de-DE": "de-DE", - "en-AU": "en-AU", - "en-CA": "en-CA", - "en-GB": "en-GB", - "en-ID": "en-ID", - "en-IN": "en-IN", - "en-MY": "en-MY", - "en-NZ": "en-NZ", - "en-PH": "en-PH", - "en-US": "en-US", - "en-ZA": "en-ZA", - "es-AR": "es-AR", - "es-CL": "es-CL", - "es-ES": "es-ES", - "es-MX": "es-MX", - "es-US": "es-US", - "fi-FI": "fi-FI", - "fr-BE": "fr-BE", - "fr-CA": "fr-CA", - "fr-CH": "fr-CH", - "fr-FR": "fr-FR", - "it-IT": "it-IT", - "ja-JP": "ja-JP", - "ko-KR": "ko-KR", - "nl-BE": "nl-BE", - "nl-NL": "nl-NL", - "pl-PL": "pl-PL", - "pt-BR": "pt-BR", - "ru-RU": "ru-RU", - "sv-SE": "sv-SE", - "tr-TR": "tr-TR", - "zh-CN": "zh-CN", - "zh-HK": "zh-HK", - "zh-TW": "zh-TW" - } - }, "mojeek": { "all_locale": null, "custom": { diff --git a/searx/engines/karmasearch.py b/searx/engines/karmasearch.py deleted file mode 100644 index a019ec885..000000000 --- a/searx/engines/karmasearch.py +++ /dev/null @@ -1,205 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""Karmasearch uses Brave's index, so the results should be the same as Brave's. - -However, the advantages of this engine are: - -- it has less strict rate-limits -- it has a JSON API, so it's less likely to break -""" - -from datetime import datetime -from urllib.parse import urlencode -import typing as t - -from dateutil import parser - -from searx.enginelib.traits import EngineTraits - -from searx.utils import html_to_text -from searx.result_types import EngineResults, MainResult -from searx.result_types._base import LegacyResult - - -if t.TYPE_CHECKING: - from searx.extended_types import SXNG_Response - from searx.search.processors import OnlineParams - -about = { - "website": "https://karmasearch.org", - "official_api_documentation": None, - "use_official_api": False, - "require_api_key": False, - "results": "JSON", -} - -base_url = "https://api.karmasearch.org" -categories = ["web", "general"] -search_type = "web" # supported: web, images, videos, news - -# all types except "images" support pagination -paging = True -safesearch = True -time_range_support = True - -safe_search_map = {0: "off", 1: "moderate", 2: "strict"} -time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"} - - -def init(_): - if search_type not in ("web", "images", "videos", "news"): - raise ValueError(f"invalid search type: {search_type}") - - -def request(query: str, params: "OnlineParams") -> None: - engine_region: str = traits.get_region(params["searxng_locale"]) or "en-US" - - args: dict[str, str | int] = { - "searchTerm": query, - "adultFilter": safe_search_map[params["safesearch"]], - "pageNumber": params["pageno"], - "country": engine_region.split("-")[-1], - "userLanguage": "en", # UI language: en, es or fr / no effect on search results - "market": engine_region, - } - if params["time_range"]: - args["freshness"] = time_range_map[params["time_range"]] - - # Needed to circumvent Cloudflare bot protection - params['headers']['Referer'] = "https://karmasearch.org" - - params["url"] = f"{base_url}/search/{search_type}?{urlencode(args)}" - - -def _parse_date(date_string: str) -> datetime | None: - try: - return parser.parse(date_string) - except parser.ParserError: - return None - - -def _parse_general(result: dict[str, str]): - return MainResult( - url=result["url"], - title=result["title"], - content=html_to_text(result["description"]), - thumbnail=result.get("thumbnail", ""), - ) - - -def _parse_news(result: dict[str, str]) -> LegacyResult: - return LegacyResult( - { - "url": result["url"], - "title": result["title"], - "content": html_to_text(result["description"]), - "thumbnail": result.get("thumbnail"), - "publishedDate": _parse_date(result.get("age", "")), - } - ) - - -def _parse_videos(result: dict[str, t.Any]) -> LegacyResult: - return LegacyResult( - { - "template": "videos.html", - "url": result["url"], - "title": result["title"], - "content": html_to_text(result["description"]), - "thumbnail": result.get("thumbnail"), - "publishedDate": _parse_date(result.get("age", "")), - "length": result.get("video", {}).get("duration"), - } - ) - - -def _parse_images(result: dict[str, t.Any]) -> LegacyResult: - return LegacyResult( - { - "template": "images.html", - "url": result["url"], - "title": result["title"], - "content": "", - "img_src": result.get("properties", {}).get("url"), - "thumbnail_src": result.get("thumbnail", {}).get("src"), - } - ) - - -def response(resp: "SXNG_Response") -> EngineResults: - res = EngineResults() - - json_resp: dict[str, t.Any] = resp.json() - if not isinstance(json_resp, dict): - return res # pyright: ignore[reportUnreachable] - - for result in json_resp["results"]: - # hide sponsored results - if result.get("sponsored", False): - continue - - if "videos" in result: - for videos_result in result["videos"]: - res.add(_parse_videos(videos_result)) - continue - - if "news" in result: - for news_result in result["news"]: - res.add(_parse_news(news_result)) - continue - - if search_type == "news": - res.add(_parse_news(result)) - elif search_type == "videos": - res.add(_parse_videos(result)) - elif search_type == "images": - res.add(_parse_images(result)) - else: - res.add(_parse_general(result)) - - return res - - -def fetch_traits(engine_traits: EngineTraits): - """Fetch :ref:`languages ` and :ref:`regions ` from Brave.""" - - # pylint: disable=import-outside-toplevel, too-many-branches - - from lxml import html - import babel - - from searx.locales import region_tag - from searx.network import get # see https://github.com/searxng/searxng/issues/762 - - # from searx.engines.xpath import extract_text - from searx.utils import gen_useragent - - headers = { - "Accept-Encoding": "gzip, deflate", - "Cache-Control": "no-cache", - "DNT": "1", - "Connection": "keep-alive", - "Accept-Language": "en,en-US;q=0.7,en;q=0.3", - "User-Agent": gen_useragent(), - } - - resp = get("https://karmasearch.org/settings", headers=headers, timeout=5) - if not resp.ok: - raise RuntimeError("Response from Brave languages is not OK.") - - dom = html.fromstring(resp.text) - for option in dom.xpath("//select[@name='country']/option"): - country_tag: str = option.get("value", "") - try: - sxng_tag = region_tag(babel.Locale.parse(country_tag, sep="-")) - except babel.UnknownLocaleError: - # silently ignore unknown languages - continue - # print("%-20s: %s <-- %s" % (extract_text(option), country_tag, sxng_tag)) - - conflict = engine_traits.regions.get(sxng_tag) - if conflict: - if conflict != country_tag: - print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, country_tag)) - continue - engine_traits.regions[sxng_tag] = country_tag diff --git a/searx/settings.yml b/searx/settings.yml index 342a33bdb..28e2ddd40 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1228,35 +1228,6 @@ engines: timeout: 3.0 disabled: true - - name: karmasearch - engine: karmasearch - categories: [general, web] - search_type: web - shortcut: ka - inactive: true - - - name: karmasearch images - engine: karmasearch - categories: [images, web] - search_type: images - shortcut: kai - paging: false - inactive: true - - - name: karmasearch videos - engine: karmasearch - categories: [videos, web] - search_type: videos - shortcut: kav - inactive: true - - - name: karmasearch news - engine: karmasearch - categories: [news, web] - search_type: news - shortcut: kan - inactive: true - - name: kickass engine: kickass base_url: