[mod] engine 500px: calc cursor instead of relying on pageInfo (#6091)

This commit is contained in:
Bnyro
2026-05-20 22:53:15 +02:00
committed by Markus Heiser
parent 24b1a1b6a8
commit d8f74af3d1
+6 -41
View File
@@ -11,11 +11,11 @@ through our exclusive distribution partners.
import typing as t import typing as t
import codecs
import random import random
import string import string
from searx.result_types import EngineResults from searx.result_types import EngineResults
from searx.enginelib import EngineCache
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response from searx.extended_types import SXNG_Response
@@ -46,27 +46,6 @@ The default was taken from the WEB UI, where the GraphQL query sets the value to
""" """
def page_hash(pageno: int, query: str):
return f"<pageno:{pageno} ({results_per_page})>" + CACHE.secret_hash(query)
CACHE: EngineCache
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
seconds.
For introspection (in the developer environment) use::
$ ./manage dev.env
(dev.env)$ python -m searx.enginelib cache status
...
[eng_500px] 2026-05-18 18:52:38 <pageno:2 (40)>6da7...76a3f7 --> (str:8) cG9zLTM5
[eng_500px] 2026-05-18 18:52:43 <pageno:3 (40)>6da7...76a3f7 --> (str:8) cG9zLTc5
In the output from the example above, we see cached *cursor* for follow up
pages, the query term is a hash value and the date shows the expire date and
time."""
SXNG_query = """query PhotoSearchPaginationContainerQuery( SXNG_query = """query PhotoSearchPaginationContainerQuery(
$first: Int, $cursor: String, $search: String!, $sort: PhotoSort, $filters: [PhotoSearchFilter!], $nlp: Boolean $first: Int, $cursor: String, $search: String!, $sort: PhotoSort, $filters: [PhotoSearchFilter!], $nlp: Boolean
) { ) {
@@ -96,31 +75,22 @@ fragment SXNG_query on Query {
} }
cursor cursor
} }
pageInfo {
endCursor
hasNextPage
}
} }
} }
""" """
def setup(engine_settings: dict[str, t.Any]) -> bool: def setup(_) -> bool:
global CACHE, SXNG_query # pylint: disable=global-statement global SXNG_query # pylint: disable=global-statement
CACHE = EngineCache(str(engine_settings.get("name")))
rand_str: str = "".join(random.choice(string.ascii_letters) for _ in range(5)) rand_str: str = "".join(random.choice(string.ascii_letters) for _ in range(5))
SXNG_query = SXNG_query.replace("SXNG_query", "PhotoSearchPaginationContainer_query_1" + rand_str) SXNG_query = SXNG_query.replace("SXNG_query", "PhotoSearchPaginationContainer_query_1" + rand_str)
return True return True
def request(query: str, params: "OnlineParams") -> None: def request(query: str, params: "OnlineParams") -> None:
# cursor is the base64 hash of the string "pos-<offset-1>", e.g. "pos-29" -> "cG9zLTI5"
cursor: str | None = None offset = ((params["pageno"] - 1) * results_per_page) - 1
if params["pageno"] > 1: cursor = codecs.encode(f"pos-{offset}".encode("utf-8"), "base64").decode("utf-8")
cursor = CACHE.get(page_hash(pageno=params["pageno"], query=query))
if not cursor:
params["url"] = None
return
params["url"] = f"{api_url}/graphql" params["url"] = f"{api_url}/graphql"
params["method"] = "POST" params["method"] = "POST"
@@ -164,9 +134,4 @@ def response(resp: "SXNG_Response"):
) )
) )
page_info: dict[str, str] = json_data["pageInfo"] # pyright: ignore[reportAny]
if page_info["hasNextPage"]:
key = page_hash(pageno=resp.search_params["pageno"] + 1, query=resp.search_params["query"])
CACHE.set(key=key, value=page_info["endCursor"], expire=3600)
return res return res