[mod] hardening of the Result.filter_urls() method (#6117)

Exceptions in the execution of the callback must be caught / ignored and logged
on the ERROR log.

To test, apply this patch to provoke a ValueError exception::

    diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py
    index ed4415bce..695ed05d2 100644
    --- a/searx/data/tracker_patterns.py
    +++ b/searx/data/tracker_patterns.py
    @@ -114,6 +114,7 @@ class TrackerPatternsDB:
             Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
             If URL should be modified, the returned string is the new URL to use.
             """
    +        raise ValueError("test callback exceptions")

             new_url = url
             parsed_new_url = urlparse(url=new_url)

Start a `make run` instance and query for example `amazon` .. have a look at the
ERROR log:

    ERROR   searx.result_types: filter_urls (field 'url'): ignore ValueError('test callback exceptions') from callback searx/data/tracker_patterns.py:117

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser
2026-05-25 18:12:40 +02:00
committed by GitHub
parent cb4b70ac50
commit 28ef4f7447
+18 -3
View File
@@ -19,6 +19,7 @@
__all__ = ["Result"]
import typing as t
import types
import re
import urllib.parse
@@ -29,7 +30,9 @@ from collections.abc import Callable
import msgspec
from searx import logger as log
from searx import logger
log = logger.getChild("result_types")
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
UNSET = object()
@@ -125,8 +128,20 @@ def _filter_urls(
if not url_src:
continue
new_url = filter_func(result, field_name, url_src)
# log.debug("filter_urls: filter_func(result, %s) '%s' -> '%s'", field_name, field_value, new_url)
try:
new_url = filter_func(result, field_name, url_src)
except Exception as exc: # pylint: disable=broad-exception-caught
# pylint: disable=no-member
_tb: types.TracebackType = exc.__traceback__.tb_next.tb_next # type: ignore
log.error(
"filter_urls (field '%s'): ignore %s from callback %s:%s",
field_name,
repr(exc),
_tb.tb_frame.f_code.co_filename,
_tb.tb_lineno,
)
continue
if isinstance(new_url, bool):
if new_url:
# log.debug("filter_urls: unchanged field %s URL %s", field_name, field_value)