From 097ab64c70d0197726e1eda5930c0cf8f37711c1 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Mon, 22 Jun 2026 07:32:23 +0200 Subject: [PATCH] [del] aol: remove engine (eol) (#6299) --- docs/dev/engines/online/aol.rst | 8 -- searx/engines/aol.py | 210 -------------------------------- searx/settings.yml | 21 ---- 3 files changed, 239 deletions(-) delete mode 100644 docs/dev/engines/online/aol.rst delete mode 100644 searx/engines/aol.py diff --git a/docs/dev/engines/online/aol.rst b/docs/dev/engines/online/aol.rst deleted file mode 100644 index 5a6ea7a63..000000000 --- a/docs/dev/engines/online/aol.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. _aol engine: - -=== -AOL -=== - -.. automodule:: searx.engines.aol - :members: diff --git a/searx/engines/aol.py b/searx/engines/aol.py deleted file mode 100644 index 4a3b06c26..000000000 --- a/searx/engines/aol.py +++ /dev/null @@ -1,210 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -"""AOL supports WEB, image, and video search. Internally, it uses the Bing -index. - -AOL doesn't seem to support setting the language via request parameters, instead -the results are based on the URL. For example, there is - -- `search.aol.com `_ for English results -- `suche.aol.de `_ for German results - -However, AOL offers its services only in a few regions: - -- en-US: search.aol.com -- de-DE: suche.aol.de -- fr-FR: recherche.aol.fr -- en-GB: search.aol.co.uk -- en-CA: search.aol.ca - -In order to still offer sufficient support for language and region, the `search -keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the -search term (AOL is basically just a proxy for Bing). - -.. _search keywords: - https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a - -""" - -from urllib.parse import urlencode, unquote_plus -import typing as t - -from lxml import html -from dateutil import parser - -from searx.result_types import EngineResults -from searx.utils import eval_xpath_list, eval_xpath, extract_text - -if t.TYPE_CHECKING: - from searx.extended_types import SXNG_Response - from searx.search.processors import OnlineParams - -about = { - "website": "https://www.aol.com", - "wikidata_id": "Q27585", - "official_api_documentation": None, - "use_official_api": False, - "require_api_key": False, - "results": "HTML", -} - -categories = ["general"] -search_type = "search" # supported: search, image, video - -paging = True -safesearch = True -time_range_support = True -results_per_page = 10 - - -base_url = "https://search.aol.com" -time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"} -safesearch_map = {0: "p", 1: "r", 2: "i"} - -enable_http2 = False - - -def init(_): - if search_type not in ("search", "image", "video"): - raise ValueError(f"unsupported search type {search_type}") - - -def request(query: str, params: "OnlineParams") -> None: - - language, region = (params["searxng_locale"].split("-") + [None])[:2] - if language and language != "all": - query = f"{query} language:{language}" - if region: - query = f"{query} loc:{region}" - - args: dict[str, str | int | None] = { - "q": query, - "b": params["pageno"] * results_per_page + 1, # page is 1-indexed - "pz": results_per_page, - } - - if params["time_range"]: - args["fr2"] = "time" - args["age"] = params["time_range"] - else: - args["fr2"] = "sb-top-search" - - params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}" - params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}" - logger.debug(params) - - -def _deobfuscate_url(obfuscated_url: str) -> str | None: - # URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long - if not obfuscated_url: - return None - - for part in obfuscated_url.split("/"): - if part.startswith("RU="): - return unquote_plus(part[3:]) - # pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link - return obfuscated_url - - -def _general_results(doc: html.HtmlElement) -> EngineResults: - res = EngineResults() - - for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"): - obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href")) - if not obfuscated_url: - continue - - url = _deobfuscate_url(obfuscated_url) - if not url: - continue - - res.add( - res.types.MainResult( - url=url, - title=extract_text(eval_xpath(result, ".//h3/a")) or "", - content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "", - thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "", - ) - ) - return res - - -def _video_results(doc: html.HtmlElement) -> EngineResults: - res = EngineResults() - - for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"): - obfuscated_url = extract_text(eval_xpath(result, ".//a/@href")) - if not obfuscated_url: - continue - - url = _deobfuscate_url(obfuscated_url) - if not url: - continue - - published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]")) - try: - published_date = parser.parse(published_date_raw or "") - except parser.ParserError: - published_date = None - - res.add( - res.types.LegacyResult( - { - "template": "videos.html", - "url": url, - "title": extract_text(eval_xpath(result, ".//h3")), - "content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")), - "thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")), - "length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")), - "publishedDate": published_date, - } - ) - ) - - return res - - -def _image_results(doc: html.HtmlElement) -> EngineResults: - res = EngineResults() - - for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"): - obfuscated_url = extract_text(eval_xpath(result, "./a/@href")) - if not obfuscated_url: - continue - - url = _deobfuscate_url(obfuscated_url) - if not url: - continue - - res.add( - res.types.LegacyResult( - { - "template": "images.html", - # results don't have an extra URL, only the image source - "url": url, - "title": extract_text(eval_xpath(result, ".//a/@aria-label")), - "thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")), - "img_src": url, - } - ) - ) - - return res - - -def response(resp: "SXNG_Response") -> EngineResults: - doc = html.fromstring(resp.text) - - match search_type: - case "search": - results = _general_results(doc) - case "image": - results = _image_results(doc) - case "video": - results = _video_results(doc) - case _: - raise ValueError("unsupported search type") - - for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"): - results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)})) - - return results diff --git a/searx/settings.yml b/searx/settings.yml index 2c4b23be1..a3c6470b8 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -444,27 +444,6 @@ engines: shortcut: conda disabled: true - - name: aol - engine: aol - search_type: search - categories: [general] - shortcut: aol - disabled: true - - - name: aol images - engine: aol - search_type: image - categories: [images] - shortcut: aoli - disabled: true - - - name: aol videos - engine: aol - search_type: video - categories: [videos] - shortcut: aolv - disabled: true - - name: arch linux wiki engine: archlinux shortcut: al