mirror of
https://github.com/searxng/searxng.git
synced 2026-05-26 12:50:14 +02:00
efc305b7f9
[mod] normalize variable name for the max number of results per request In the past, we have used different names for the variable that specifies the maximum number of hits in the outgoing request. - ``page_size`` - ``number_of_results`` - ``nb_per_page`` Since *page_size* is the most accurate term and is also used in the XPath engines, all other engines are adjusted accordingly within this patch .. documentation adjusted accordingly. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
162 lines
4.4 KiB
Python
162 lines
4.4 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||
"""CORE_ (COnnecting REpositories) provides a comprehensive bibliographic
|
||
database of the world’s scholarly literature, collecting and indexing
|
||
research from repositories and journals.
|
||
|
||
.. _CORE: https://core.ac.uk/about
|
||
|
||
.. note::
|
||
|
||
The CORE engine requires an :py:obj:`API key <api_key>`.
|
||
|
||
.. _core engine config:
|
||
|
||
Configuration
|
||
=============
|
||
|
||
The engine has the following additional settings:
|
||
|
||
- :py:obj:`api_key`
|
||
|
||
.. code:: yaml
|
||
|
||
- name: core.ac.uk
|
||
api_key: "..."
|
||
inactive: false
|
||
|
||
Implementations
|
||
===============
|
||
|
||
"""
|
||
|
||
import typing as t
|
||
|
||
from datetime import datetime
|
||
from urllib.parse import urlencode
|
||
|
||
from searx.result_types import EngineResults
|
||
|
||
if t.TYPE_CHECKING:
|
||
from searx.extended_types import SXNG_Response
|
||
from searx.search.processors import OnlineParams
|
||
|
||
|
||
about = {
|
||
"website": "https://core.ac.uk",
|
||
"wikidata_id": "Q22661180",
|
||
"official_api_documentation": "https://api.core.ac.uk/docs/v3",
|
||
"use_official_api": True,
|
||
"require_api_key": True,
|
||
"results": "JSON",
|
||
}
|
||
|
||
api_key = ""
|
||
"""For an API key register at https://core.ac.uk/services/api and insert
|
||
the API key in the engine :ref:`core engine config`."""
|
||
|
||
categories = ["science", "scientific publications"]
|
||
paging = True
|
||
page_size = 10
|
||
base_url = "https://api.core.ac.uk/v3/search/works/"
|
||
|
||
|
||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||
"""Initialization of the CORE_ engine, checks whether the :py:obj:`api_key`
|
||
is set, otherwise the engine is inactive.
|
||
"""
|
||
|
||
key: str = engine_settings.get("api_key", "")
|
||
if key and key not in ("unset", "unknown", "..."):
|
||
return True
|
||
logger.error("CORE's API key is not set or invalid.")
|
||
return False
|
||
|
||
|
||
def request(query: str, params: "OnlineParams") -> None:
|
||
|
||
# API v3 uses different parameters
|
||
search_params = {
|
||
"q": query,
|
||
"offset": (params["pageno"] - 1) * page_size,
|
||
"limit": page_size,
|
||
"sort": "relevance",
|
||
}
|
||
|
||
params["url"] = base_url + "?" + urlencode(search_params)
|
||
params["headers"] = {"Authorization": f"Bearer {api_key}"}
|
||
|
||
|
||
def response(resp: "SXNG_Response") -> EngineResults:
|
||
# pylint: disable=too-many-branches
|
||
res = EngineResults()
|
||
json_data = resp.json()
|
||
|
||
for result in json_data.get("results", []):
|
||
# Get title
|
||
if not result.get("title"):
|
||
continue
|
||
|
||
# Get URL - try different options
|
||
url: str | None = None
|
||
|
||
# Try DOI first
|
||
doi: str = result.get("doi")
|
||
if doi:
|
||
url = f"https://doi.org/{doi}"
|
||
|
||
if url is None and result.get("doi"):
|
||
# use the DOI reference
|
||
url = "https://doi.org/" + str(result["doi"])
|
||
elif result.get("id"):
|
||
url = "https://core.ac.uk/works/" + str(result["id"])
|
||
elif result.get("downloadUrl"):
|
||
url = result["downloadUrl"]
|
||
elif result.get("sourceFulltextUrls"):
|
||
url = result["sourceFulltextUrls"]
|
||
else:
|
||
continue
|
||
|
||
# Published date
|
||
published_date = None
|
||
|
||
raw_date = result.get("publishedDate") or result.get("depositedDate")
|
||
if raw_date:
|
||
try:
|
||
published_date = datetime.fromisoformat(result["publishedDate"].replace("Z", "+00:00"))
|
||
except (ValueError, AttributeError):
|
||
pass
|
||
|
||
# Handle journals
|
||
journals = []
|
||
if result.get("journals"):
|
||
journals = [j.get("title") for j in result["journals"] if j.get("title")]
|
||
|
||
# Handle publisher
|
||
publisher = result.get("publisher", "").strip("'")
|
||
|
||
# Handle authors
|
||
authors: set[str] = set()
|
||
for i in result.get("authors", []):
|
||
name: str | None = i.get("name")
|
||
if name:
|
||
authors.add(name)
|
||
|
||
res.add(
|
||
res.types.Paper(
|
||
title=result.get("title"),
|
||
url=url,
|
||
content=result.get("fullText", "") or "",
|
||
tags=result.get("fieldOfStudy", []),
|
||
publishedDate=published_date,
|
||
type=result.get("documentType", "") or "",
|
||
authors=authors,
|
||
editor=", ".join(result.get("contributors", [])),
|
||
publisher=publisher,
|
||
journal=", ".join(journals),
|
||
doi=result.get("doi"),
|
||
pdf_url=result.get("downloadUrl", {}) or result.get("sourceFulltextUrls", {}),
|
||
)
|
||
)
|
||
|
||
return res
|