mirror of
https://github.com/searxng/searxng.git
synced 2026-05-07 18:03:51 +02:00
[fix] annas archive: rotate between available backup domains
- closes https://github.com/searxng/searxng/issues/5633
This commit is contained in:
@@ -34,6 +34,9 @@ Implementations
|
|||||||
===============
|
===============
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
import typing as t
|
import typing as t
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
@@ -66,7 +69,9 @@ categories = ["files", "books"]
|
|||||||
paging: bool = True
|
paging: bool = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url: str = "https://annas-archive.org"
|
base_url: list[str] | str = []
|
||||||
|
"""List of Anna's archive domains or a single domain (as string)."""
|
||||||
|
|
||||||
aa_content: str = ""
|
aa_content: str = ""
|
||||||
"""Anan's search form field **Content** / possible values::
|
"""Anan's search form field **Content** / possible values::
|
||||||
|
|
||||||
@@ -98,6 +103,9 @@ def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-
|
|||||||
"""Check of engine's settings."""
|
"""Check of engine's settings."""
|
||||||
traits = EngineTraits(**ENGINE_TRAITS["annas archive"])
|
traits = EngineTraits(**ENGINE_TRAITS["annas archive"])
|
||||||
|
|
||||||
|
if not base_url:
|
||||||
|
raise ValueError("missing required config `base_url`")
|
||||||
|
|
||||||
if aa_content and aa_content not in traits.custom["content"]:
|
if aa_content and aa_content not in traits.custom["content"]:
|
||||||
raise ValueError(f"invalid setting content: {aa_content}")
|
raise ValueError(f"invalid setting content: {aa_content}")
|
||||||
|
|
||||||
@@ -110,6 +118,13 @@ def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_base_url_choice() -> str:
|
||||||
|
if isinstance(base_url, list):
|
||||||
|
return random.choice(base_url)
|
||||||
|
|
||||||
|
return base_url
|
||||||
|
|
||||||
|
|
||||||
def request(query: str, params: "OnlineParams") -> None:
|
def request(query: str, params: "OnlineParams") -> None:
|
||||||
lang = traits.get_language(params["searxng_locale"], traits.all_locale)
|
lang = traits.get_language(params["searxng_locale"], traits.all_locale)
|
||||||
args = {
|
args = {
|
||||||
@@ -122,7 +137,9 @@ def request(query: str, params: "OnlineParams") -> None:
|
|||||||
}
|
}
|
||||||
# filter out None and empty values
|
# filter out None and empty values
|
||||||
filtered_args = dict((k, v) for k, v in args.items() if v)
|
filtered_args = dict((k, v) for k, v in args.items() if v)
|
||||||
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
|
|
||||||
|
params["base_url"] = get_base_url_choice()
|
||||||
|
params["url"] = f"{params['base_url']}/search?{urlencode(filtered_args)}"
|
||||||
|
|
||||||
|
|
||||||
def response(resp: "SXNG_Response") -> EngineResults:
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
@@ -136,16 +153,16 @@ def response(resp: "SXNG_Response") -> EngineResults:
|
|||||||
|
|
||||||
for item in eval_xpath_list(dom, "//main//div[contains(@class, 'js-aarecord-list-outer')]/div"):
|
for item in eval_xpath_list(dom, "//main//div[contains(@class, 'js-aarecord-list-outer')]/div"):
|
||||||
try:
|
try:
|
||||||
kwargs: dict[str, t.Any] = _get_result(item)
|
kwargs: dict[str, t.Any] = _get_result(item, resp.search_params["base_url"])
|
||||||
except SearxEngineXPathException:
|
except SearxEngineXPathException:
|
||||||
continue
|
continue
|
||||||
res.add(res.types.Paper(**kwargs))
|
res.add(res.types.Paper(**kwargs))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
def _get_result(item: ElementBase) -> dict[str, t.Any]:
|
def _get_result(item: ElementBase, base_url_choice) -> dict[str, t.Any]:
|
||||||
return {
|
return {
|
||||||
"url": base_url + eval_xpath_getindex(item, "./a/@href", 0),
|
"url": base_url_choice + eval_xpath_getindex(item, "./a/@href", 0),
|
||||||
"title": extract_text(eval_xpath(item, "./div//a[starts-with(@href, '/md5')]")),
|
"title": extract_text(eval_xpath(item, "./div//a[starts-with(@href, '/md5')]")),
|
||||||
"authors": [extract_text(eval_xpath_getindex(item, ".//a[starts-with(@href, '/search')]", 0))],
|
"authors": [extract_text(eval_xpath_getindex(item, ".//a[starts-with(@href, '/search')]", 0))],
|
||||||
"publisher": extract_text(
|
"publisher": extract_text(
|
||||||
@@ -169,7 +186,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
engine_traits.custom["ext"] = []
|
engine_traits.custom["ext"] = []
|
||||||
engine_traits.custom["sort"] = []
|
engine_traits.custom["sort"] = []
|
||||||
|
|
||||||
resp = get(base_url + "/search")
|
resp = get(get_base_url_choice() + "/search")
|
||||||
if not resp.ok:
|
if not resp.ok:
|
||||||
raise RuntimeError("Response from Anna's search page is not OK.")
|
raise RuntimeError("Response from Anna's search page is not OK.")
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|||||||
@@ -403,6 +403,9 @@ engines:
|
|||||||
|
|
||||||
- name: annas archive
|
- name: annas archive
|
||||||
engine: annas_archive
|
engine: annas_archive
|
||||||
|
base_url:
|
||||||
|
- https://annas-archive.li
|
||||||
|
- https://annas-archive.pm
|
||||||
disabled: true
|
disabled: true
|
||||||
shortcut: aa
|
shortcut: aa
|
||||||
timeout: 5
|
timeout: 5
|
||||||
|
|||||||
Reference in New Issue
Block a user