mirror of
https://github.com/searxng/searxng.git
synced 2026-06-22 17:48:33 +02:00
[del] aol: remove engine (eol) (#6299)
This commit is contained in:
@@ -1,8 +0,0 @@
|
|||||||
.. _aol engine:
|
|
||||||
|
|
||||||
===
|
|
||||||
AOL
|
|
||||||
===
|
|
||||||
|
|
||||||
.. automodule:: searx.engines.aol
|
|
||||||
:members:
|
|
||||||
@@ -1,210 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""AOL supports WEB, image, and video search. Internally, it uses the Bing
|
|
||||||
index.
|
|
||||||
|
|
||||||
AOL doesn't seem to support setting the language via request parameters, instead
|
|
||||||
the results are based on the URL. For example, there is
|
|
||||||
|
|
||||||
- `search.aol.com <https://search.aol.com>`_ for English results
|
|
||||||
- `suche.aol.de <https://suche.aol.de>`_ for German results
|
|
||||||
|
|
||||||
However, AOL offers its services only in a few regions:
|
|
||||||
|
|
||||||
- en-US: search.aol.com
|
|
||||||
- de-DE: suche.aol.de
|
|
||||||
- fr-FR: recherche.aol.fr
|
|
||||||
- en-GB: search.aol.co.uk
|
|
||||||
- en-CA: search.aol.ca
|
|
||||||
|
|
||||||
In order to still offer sufficient support for language and region, the `search
|
|
||||||
keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the
|
|
||||||
search term (AOL is basically just a proxy for Bing).
|
|
||||||
|
|
||||||
.. _search keywords:
|
|
||||||
https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlencode, unquote_plus
|
|
||||||
import typing as t
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
|
||||||
from searx.utils import eval_xpath_list, eval_xpath, extract_text
|
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
|
||||||
from searx.extended_types import SXNG_Response
|
|
||||||
from searx.search.processors import OnlineParams
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": "https://www.aol.com",
|
|
||||||
"wikidata_id": "Q27585",
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": "HTML",
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ["general"]
|
|
||||||
search_type = "search" # supported: search, image, video
|
|
||||||
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
time_range_support = True
|
|
||||||
results_per_page = 10
|
|
||||||
|
|
||||||
|
|
||||||
base_url = "https://search.aol.com"
|
|
||||||
time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"}
|
|
||||||
safesearch_map = {0: "p", 1: "r", 2: "i"}
|
|
||||||
|
|
||||||
enable_http2 = False
|
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
|
||||||
if search_type not in ("search", "image", "video"):
|
|
||||||
raise ValueError(f"unsupported search type {search_type}")
|
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: "OnlineParams") -> None:
|
|
||||||
|
|
||||||
language, region = (params["searxng_locale"].split("-") + [None])[:2]
|
|
||||||
if language and language != "all":
|
|
||||||
query = f"{query} language:{language}"
|
|
||||||
if region:
|
|
||||||
query = f"{query} loc:{region}"
|
|
||||||
|
|
||||||
args: dict[str, str | int | None] = {
|
|
||||||
"q": query,
|
|
||||||
"b": params["pageno"] * results_per_page + 1, # page is 1-indexed
|
|
||||||
"pz": results_per_page,
|
|
||||||
}
|
|
||||||
|
|
||||||
if params["time_range"]:
|
|
||||||
args["fr2"] = "time"
|
|
||||||
args["age"] = params["time_range"]
|
|
||||||
else:
|
|
||||||
args["fr2"] = "sb-top-search"
|
|
||||||
|
|
||||||
params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}"
|
|
||||||
params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}"
|
|
||||||
logger.debug(params)
|
|
||||||
|
|
||||||
|
|
||||||
def _deobfuscate_url(obfuscated_url: str) -> str | None:
|
|
||||||
# URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long
|
|
||||||
if not obfuscated_url:
|
|
||||||
return None
|
|
||||||
|
|
||||||
for part in obfuscated_url.split("/"):
|
|
||||||
if part.startswith("RU="):
|
|
||||||
return unquote_plus(part[3:])
|
|
||||||
# pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link
|
|
||||||
return obfuscated_url
|
|
||||||
|
|
||||||
|
|
||||||
def _general_results(doc: html.HtmlElement) -> EngineResults:
|
|
||||||
res = EngineResults()
|
|
||||||
|
|
||||||
for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"):
|
|
||||||
obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href"))
|
|
||||||
if not obfuscated_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = _deobfuscate_url(obfuscated_url)
|
|
||||||
if not url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
res.add(
|
|
||||||
res.types.MainResult(
|
|
||||||
url=url,
|
|
||||||
title=extract_text(eval_xpath(result, ".//h3/a")) or "",
|
|
||||||
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "",
|
|
||||||
thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def _video_results(doc: html.HtmlElement) -> EngineResults:
|
|
||||||
res = EngineResults()
|
|
||||||
|
|
||||||
for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"):
|
|
||||||
obfuscated_url = extract_text(eval_xpath(result, ".//a/@href"))
|
|
||||||
if not obfuscated_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = _deobfuscate_url(obfuscated_url)
|
|
||||||
if not url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]"))
|
|
||||||
try:
|
|
||||||
published_date = parser.parse(published_date_raw or "")
|
|
||||||
except parser.ParserError:
|
|
||||||
published_date = None
|
|
||||||
|
|
||||||
res.add(
|
|
||||||
res.types.LegacyResult(
|
|
||||||
{
|
|
||||||
"template": "videos.html",
|
|
||||||
"url": url,
|
|
||||||
"title": extract_text(eval_xpath(result, ".//h3")),
|
|
||||||
"content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")),
|
|
||||||
"thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")),
|
|
||||||
"length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")),
|
|
||||||
"publishedDate": published_date,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def _image_results(doc: html.HtmlElement) -> EngineResults:
|
|
||||||
res = EngineResults()
|
|
||||||
|
|
||||||
for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"):
|
|
||||||
obfuscated_url = extract_text(eval_xpath(result, "./a/@href"))
|
|
||||||
if not obfuscated_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = _deobfuscate_url(obfuscated_url)
|
|
||||||
if not url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
res.add(
|
|
||||||
res.types.LegacyResult(
|
|
||||||
{
|
|
||||||
"template": "images.html",
|
|
||||||
# results don't have an extra URL, only the image source
|
|
||||||
"url": url,
|
|
||||||
"title": extract_text(eval_xpath(result, ".//a/@aria-label")),
|
|
||||||
"thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")),
|
|
||||||
"img_src": url,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp: "SXNG_Response") -> EngineResults:
|
|
||||||
doc = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
match search_type:
|
|
||||||
case "search":
|
|
||||||
results = _general_results(doc)
|
|
||||||
case "image":
|
|
||||||
results = _image_results(doc)
|
|
||||||
case "video":
|
|
||||||
results = _video_results(doc)
|
|
||||||
case _:
|
|
||||||
raise ValueError("unsupported search type")
|
|
||||||
|
|
||||||
for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"):
|
|
||||||
results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)}))
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -444,27 +444,6 @@ engines:
|
|||||||
shortcut: conda
|
shortcut: conda
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
- name: aol
|
|
||||||
engine: aol
|
|
||||||
search_type: search
|
|
||||||
categories: [general]
|
|
||||||
shortcut: aol
|
|
||||||
disabled: true
|
|
||||||
|
|
||||||
- name: aol images
|
|
||||||
engine: aol
|
|
||||||
search_type: image
|
|
||||||
categories: [images]
|
|
||||||
shortcut: aoli
|
|
||||||
disabled: true
|
|
||||||
|
|
||||||
- name: aol videos
|
|
||||||
engine: aol
|
|
||||||
search_type: video
|
|
||||||
categories: [videos]
|
|
||||||
shortcut: aolv
|
|
||||||
disabled: true
|
|
||||||
|
|
||||||
- name: arch linux wiki
|
- name: arch linux wiki
|
||||||
engine: archlinux
|
engine: archlinux
|
||||||
shortcut: al
|
shortcut: al
|
||||||
|
|||||||
Reference in New Issue
Block a user