# SPDX-License-Identifier: AGPL-3.0-or-later """Tusksearch_ is an American search engine that claims to fight censorship. Its search results are (at least partially) from Brave. .. _Tusksearch: https://tusksearch.com/about """ from json import loads import random import typing as t from urllib.parse import urlencode from dateutil import parser from searx.exceptions import SearxEngineAPIException from searx.network import get from searx.utils import html_to_text from searx.result_types import EngineResults if t.TYPE_CHECKING: from searx.extended_types import SXNG_Response from searx.search.processors import OnlineParams about = { "website": "https://tusksearch.com", "wikidata_id": None, "official_api_documentation": None, "use_official_api": False, "require_api_key": False, "results": "JSON", } paging = True categories = ["general"] tusk_categ = "web" """Category to search in. Can be either "web", "images", "videos" or "news".""" api_url = "https://api.tusksearch.com" def init(_): if tusk_categ not in ("web", "images", "videos", "news"): raise ValueError("invalid search type: %s" % tusk_categ) def _obtain_x_sid() -> tuple[str, str]: """ The session ID ("sid") is encoded as a byte array in ``embed.js``. It is only valid for exactly one request, so we can't cache it. The header key is usually called `x-sid-{UUIDv4}`, and the value is usually a plain UUIDv4 (but a different one than in the header key). """ resp = get(f"{api_url}/revcontent/embed.js") if not resp.ok: raise SearxEngineAPIException("failed to obtain request x-sid token") # data is prefixed by 'var x=' data_array = loads(resp.text[6:]) def _byte_array_to_ascii(text: list[int]) -> str: """ Converts a byte array (e.g. [81, 101, 97, 114, 88, 78, 71]) to the ASCII string representation (e.g. "SearXNG"). """ return "".join([chr(x) for x in text]) x_sid_header = _byte_array_to_ascii(data_array[3]) x_sid_value = _byte_array_to_ascii(data_array[4]) return x_sid_header, x_sid_value def request(query: str, params: "OnlineParams") -> None: # images don't support pagination, news and videos only support two pages if tusk_categ == "images" and params["pageno"] > 1 or tusk_categ in ("news", "videos") and params["pageno"] > 2: params["url"] = None return args = { "q": query, "p": params["pageno"], "l": "center", # political direction: "left", "center" or "right" } if tusk_categ == "images": params["url"] = f"{api_url}/Search/Image?{urlencode(args)}" else: # web response also contains news and videos params["url"] = f"{api_url}/Search/Web?{urlencode(args)}" x_sid_header, x_sid_value = _obtain_x_sid() params["headers"] = { x_sid_header: x_sid_value, # required - we send a random longitude and latitude instead of the actual user location 'x-lon': str(random.random() * 90), 'x-lat': str(random.random() * 90), } def response(resp: "SXNG_Response"): res = EngineResults() json_resp = resp.json()["results"] if tusk_categ == "web": for result in (json_resp.get("web") or {}).get("results", []): res.add( res.types.MainResult( url=result["url"], title=html_to_text(result["title"]), content=html_to_text(result["description"]), thumbnail=(result["thumbnail"] or {}).get("src") or "", ) ) elif tusk_categ == "news": for result in (json_resp.get("news") or {}).get("results", []): publishedDate = None try: publishedDate = parser.parse(result["age"]) except parser.ParserError: pass res.add( res.types.MainResult( url=result["url"], title=html_to_text(result["title"]), content=html_to_text(result["description"]), thumbnail=result["thumbnail"]["src"], publishedDate=publishedDate, ) ) elif tusk_categ == "videos": for result in (json_resp.get("videos") or {}).get("results", []): publishedDate = None try: publishedDate = parser.parse(result["age"]) except parser.ParserError: pass res.add( res.types.LegacyResult( template="videos.html", url=result["url"], title=html_to_text(result["title"]), content=html_to_text(result["description"]), thumbnail=result["thumbnail"]["src"], publishedDate=publishedDate, length=result["video"].get("duration"), ) ) elif tusk_categ == "images": for result in json_resp: res.add( res.types.Image( url=result["url"], title=html_to_text(result["title"]), img_src=result["properties"]["url"], thumbnail_src=result["thumbnail"]["src"], ) ) return res