From d8f74af3d1a844aeb7a6cfde73792b199fb97aa3 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Wed, 20 May 2026 22:53:15 +0200 Subject: [PATCH] [mod] engine 500px: calc cursor instead of relying on pageInfo (#6091) --- searx/engines/500px.py | 47 ++++++------------------------------------ 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/searx/engines/500px.py b/searx/engines/500px.py index f5f8a63c9..99596c144 100644 --- a/searx/engines/500px.py +++ b/searx/engines/500px.py @@ -11,11 +11,11 @@ through our exclusive distribution partners. import typing as t +import codecs import random import string from searx.result_types import EngineResults -from searx.enginelib import EngineCache if t.TYPE_CHECKING: from searx.extended_types import SXNG_Response @@ -46,27 +46,6 @@ The default was taken from the WEB UI, where the GraphQL query sets the value to """ -def page_hash(pageno: int, query: str): - return f"" + CACHE.secret_hash(query) - - -CACHE: EngineCache -"""Persistent (SQLite) key/value cache that deletes its values after ``expire`` -seconds. - -For introspection (in the developer environment) use:: - - $ ./manage dev.env - (dev.env)$ python -m searx.enginelib cache status - ... - [eng_500px] 2026-05-18 18:52:38 6da7...76a3f7 --> (str:8) cG9zLTM5 - [eng_500px] 2026-05-18 18:52:43 6da7...76a3f7 --> (str:8) cG9zLTc5 - -In the output from the example above, we see cached *cursor* for follow up -pages, the query term is a hash value and the date shows the expire date and -time.""" - - SXNG_query = """query PhotoSearchPaginationContainerQuery( $first: Int, $cursor: String, $search: String!, $sort: PhotoSort, $filters: [PhotoSearchFilter!], $nlp: Boolean ) { @@ -96,31 +75,22 @@ fragment SXNG_query on Query { } cursor } - pageInfo { - endCursor - hasNextPage - } } } """ -def setup(engine_settings: dict[str, t.Any]) -> bool: - global CACHE, SXNG_query # pylint: disable=global-statement - CACHE = EngineCache(str(engine_settings.get("name"))) +def setup(_) -> bool: + global SXNG_query # pylint: disable=global-statement rand_str: str = "".join(random.choice(string.ascii_letters) for _ in range(5)) SXNG_query = SXNG_query.replace("SXNG_query", "PhotoSearchPaginationContainer_query_1" + rand_str) return True def request(query: str, params: "OnlineParams") -> None: - - cursor: str | None = None - if params["pageno"] > 1: - cursor = CACHE.get(page_hash(pageno=params["pageno"], query=query)) - if not cursor: - params["url"] = None - return + # cursor is the base64 hash of the string "pos-", e.g. "pos-29" -> "cG9zLTI5" + offset = ((params["pageno"] - 1) * results_per_page) - 1 + cursor = codecs.encode(f"pos-{offset}".encode("utf-8"), "base64").decode("utf-8") params["url"] = f"{api_url}/graphql" params["method"] = "POST" @@ -164,9 +134,4 @@ def response(resp: "SXNG_Response"): ) ) - page_info: dict[str, str] = json_data["pageInfo"] # pyright: ignore[reportAny] - if page_info["hasNextPage"]: - key = page_hash(pageno=resp.search_params["pageno"] + 1, query=resp.search_params["query"]) - CACHE.set(key=key, value=page_info["endCursor"], expire=3600) - return res