From 24b1a1b6a80e430af5cb5951b0a90cf2bad21198 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sun, 17 May 2026 15:14:31 +0200 Subject: [PATCH] [feat] engines: add 500px.com engine (#6091) --- docs/dev/engines/online/500px.rst | 8 ++ searx/enginelib/__init__.py | 3 +- searx/engines/500px.py | 172 ++++++++++++++++++++++++++++++ searx/settings.yml | 6 ++ 4 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 docs/dev/engines/online/500px.rst create mode 100644 searx/engines/500px.py diff --git a/docs/dev/engines/online/500px.rst b/docs/dev/engines/online/500px.rst new file mode 100644 index 000000000..7dd0b4d70 --- /dev/null +++ b/docs/dev/engines/online/500px.rst @@ -0,0 +1,8 @@ +.. _500px engine: + +===== +500px +===== + +.. automodule:: searx.engines.500px + :members: diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py index 8eba15894..44f6654f8 100644 --- a/searx/enginelib/__init__.py +++ b/searx/enginelib/__init__.py @@ -160,7 +160,8 @@ class EngineCache: def __init__(self, engine_name: str, expire: int | None = None): self.expire: int = expire or ENGINES_CACHE.cfg.MAXHOLD_TIME _valid = "-_." + string.ascii_letters + string.digits - self.table_name: str = "".join([c if c in _valid else "_" for c in engine_name]) + # engine_name is a table and SQL table names must start with a letter + self.table_name: str = "eng_" + "".join([c if c in _valid else "_" for c in engine_name]) def set(self, key: str, value: t.Any, expire: int | None = None) -> bool: return ENGINES_CACHE.set( diff --git a/searx/engines/500px.py b/searx/engines/500px.py new file mode 100644 index 000000000..f5f8a63c9 --- /dev/null +++ b/searx/engines/500px.py @@ -0,0 +1,172 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=invalid-name +"""500px_ is a online network for photographers with millions of members +worldwide. Photographers come to 500px to discover and share incredible photos, +gain meaningful exposure, compete in photo contests, and license their photos +through our exclusive distribution partners. + +.. _500px: https://500px.com + +""" + +import typing as t + +import random +import string + +from searx.result_types import EngineResults +from searx.enginelib import EngineCache + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams + + +# about +about = { + "website": "https://500px.com", + "wikidata_id": "Q354894", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "JSON", +} + +base_url = "https://500px.com" +api_url = "https://api.500px.com" + +categories = ["images"] + +paging = True +results_per_page = 30 +"""Number of results to return in the request. + +The default was taken from the WEB UI, where the GraphQL query sets the value to +*static*: ``first: 30``. +""" + + +def page_hash(pageno: int, query: str): + return f"" + CACHE.secret_hash(query) + + +CACHE: EngineCache +"""Persistent (SQLite) key/value cache that deletes its values after ``expire`` +seconds. + +For introspection (in the developer environment) use:: + + $ ./manage dev.env + (dev.env)$ python -m searx.enginelib cache status + ... + [eng_500px] 2026-05-18 18:52:38 6da7...76a3f7 --> (str:8) cG9zLTM5 + [eng_500px] 2026-05-18 18:52:43 6da7...76a3f7 --> (str:8) cG9zLTc5 + +In the output from the example above, we see cached *cursor* for follow up +pages, the query term is a hash value and the date shows the expire date and +time.""" + + +SXNG_query = """query PhotoSearchPaginationContainerQuery( + $first: Int, $cursor: String, $search: String!, $sort: PhotoSort, $filters: [PhotoSearchFilter!], $nlp: Boolean +) { + ...SXNG_query +} + +fragment SXNG_query on Query { + photoSearch(sort: $sort, first: $first, after: $cursor, search: $search, filters: $filters, nlp: $nlp) { + edges { + node { + id + canonicalPath + name + description + width + height + photographer: uploader { + displayName + } + images(sizes: [35, 33]) { + size + url + jpegUrl + webpUrl + id + } + } + cursor + } + pageInfo { + endCursor + hasNextPage + } + } +} +""" + + +def setup(engine_settings: dict[str, t.Any]) -> bool: + global CACHE, SXNG_query # pylint: disable=global-statement + CACHE = EngineCache(str(engine_settings.get("name"))) + rand_str: str = "".join(random.choice(string.ascii_letters) for _ in range(5)) + SXNG_query = SXNG_query.replace("SXNG_query", "PhotoSearchPaginationContainer_query_1" + rand_str) + return True + + +def request(query: str, params: "OnlineParams") -> None: + + cursor: str | None = None + if params["pageno"] > 1: + cursor = CACHE.get(page_hash(pageno=params["pageno"], query=query)) + if not cursor: + params["url"] = None + return + + params["url"] = f"{api_url}/graphql" + params["method"] = "POST" + params["json"] = { + "operationName": "PhotoSearchPaginationContainerQuery", + "variables": { + "first": results_per_page, + "cursor": cursor, + "search": query, + "sort": "RELEVANCE", + "filters": [], + "nlp": False, + }, + "query": SXNG_query, + } + + +def response(resp: "SXNG_Response"): + res = EngineResults() + json_data = resp.json()["data"]["photoSearch"] + + for edge in json_data["edges"]: + node = edge["node"] # pyright: ignore[reportAny] + if not node["images"]: + continue + images: list[dict[str, str]] = sorted(node["images"], key=lambda i: i["size"]) + thumbnail_src = images[0]["url"] + img_src = images[-1]["url"] + res.add( + res.types.LegacyResult( + { + "template": "images.html", + "url": base_url + node["canonicalPath"], + "thumbnail_src": thumbnail_src, + "img_src": img_src, + "title": node["name"], + "content": node["description"], + "author": node["photographer"]["displayName"], + "resolution": f"{node['width']}x{node['height']}", + } + ) + ) + + page_info: dict[str, str] = json_data["pageInfo"] # pyright: ignore[reportAny] + if page_info["hasNextPage"]: + key = page_hash(pageno=resp.search_params["pageno"] + 1, query=resp.search_params["query"]) + CACHE.set(key=key, value=page_info["endCursor"], expire=3600) + + return res diff --git a/searx/settings.yml b/searx/settings.yml index 316d9167d..d6adcc3f1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -310,6 +310,12 @@ engines: shortcut: 360sov disabled: true + - name: 500px + engine: 500px + shortcut: "500" + disabled: true + timeout: 5 + - name: 9gag engine: 9gag shortcut: 9g