From 28ef4f7447debd6f988963c80b3ad15046c65908 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Mon, 25 May 2026 18:12:40 +0200 Subject: [PATCH] [mod] hardening of the Result.filter_urls() method (#6117) Exceptions in the execution of the callback must be caught / ignored and logged on the ERROR log. To test, apply this patch to provoke a ValueError exception:: diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py index ed4415bce..695ed05d2 100644 --- a/searx/data/tracker_patterns.py +++ b/searx/data/tracker_patterns.py @@ -114,6 +114,7 @@ class TrackerPatternsDB: Returns bool ``True`` to use URL unchanged (``False`` to ignore URL). If URL should be modified, the returned string is the new URL to use. """ + raise ValueError("test callback exceptions") new_url = url parsed_new_url = urlparse(url=new_url) Start a `make run` instance and query for example `amazon` .. have a look at the ERROR log: ERROR searx.result_types: filter_urls (field 'url'): ignore ValueError('test callback exceptions') from callback searx/data/tracker_patterns.py:117 Signed-off-by: Markus Heiser --- searx/result_types/_base.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/searx/result_types/_base.py b/searx/result_types/_base.py index 3624d64ec..a08675d5f 100644 --- a/searx/result_types/_base.py +++ b/searx/result_types/_base.py @@ -19,6 +19,7 @@ __all__ = ["Result"] import typing as t +import types import re import urllib.parse @@ -29,7 +30,9 @@ from collections.abc import Callable import msgspec -from searx import logger as log +from searx import logger + +log = logger.getChild("result_types") WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) UNSET = object() @@ -125,8 +128,20 @@ def _filter_urls( if not url_src: continue - new_url = filter_func(result, field_name, url_src) - # log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url) + try: + new_url = filter_func(result, field_name, url_src) + except Exception as exc: # pylint: disable=broad-exception-caught + # pylint: disable=no-member + _tb: types.TracebackType = exc.__traceback__.tb_next.tb_next # type: ignore + log.error( + "filter_urls (field '%s'): ignore %s from callback %s:%s", + field_name, + repr(exc), + _tb.tb_frame.f_code.co_filename, + _tb.tb_lineno, + ) + continue + if isinstance(new_url, bool): if new_url: # log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)