[mod] drop SearXNG's checker (#5767)

To date, there is no analysis for the checker that could be evaluated in any meaningful way. - https://github.com/searxng/searxng/issues/3407 - https://github.com/searxng/searxng/pull/3312 The checker would need to be completely redesigned, but even then, its usefulness and the maintenance required for it would be disproportionate. TBH: In its current form, it is useless and only consumes resources and causes the engines to be blocked, because these tests (query terms) come from *hundreds* of instances and could be interpreted as bot attacks. Related issues: [search.checker](https://github.com/searxng/searxng/issues?q=label%3A%22search.checker%22) Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2026-05-07 18:03:51 +02:00 · 2026-02-22 14:44:21 +01:00
parent 5054e69844
commit 191818b865
19 changed files with 8 additions and 1037 deletions
@@ -17,7 +17,6 @@ help:
 	@echo 'install        - developer install of SearxNG into virtualenv'
 	@echo 'uninstall      - uninstall developer installation'
 	@echo 'clean          - clean up working tree'
-	@echo 'search.checker - check search engines'
 	@echo 'test           - run shell & CI tests'
 	@echo 'test.shell     - test shell scripts'
 	@echo 'ci.test        - run CI tests'
@@ -39,12 +38,6 @@ clean: py.clean docs.clean node.clean nvm.clean go.clean test.clean
 	$(Q)find . -name '*~' -exec rm -f {} +
 	$(Q)find . -name '*.bak' -exec rm -f {} +

-PHONY += search.checker search.checker.%
-search.checker: install
-	$(Q)./manage pyenv.cmd searxng-checker -v
-
-search.checker.%: install
-	$(Q)./manage pyenv.cmd searxng-checker -v "$(subst _, ,$(patsubst search.checker.%,%,$@))"

 PHONY += test ci.test test.shell
 test:    test.yamllint test.black test.pyright_modified test.pylint test.unit test.robot test.rst test.shell test.shfmt
@@ -291,38 +291,6 @@ Pylint_ is known as one of the best source-code, bug and quality checker for the
 Python programming language.  The pylint profile used in the SearXNG project is
 found in project's root folder :origin:`.pylintrc`.

-.. _make search.checker:
-
-``make search.checker.{engine name}``
-=====================================
-
-To check all engines::
-
-    make search.checker
-
-To check a engine with whitespace in the name like *google news* replace space
-by underline::
-
-    make search.checker.google_news
-
-To see HTTP requests and more use SEARXNG_DEBUG::
-
-    make SEARXNG_DEBUG=1 search.checker.google_news
-
-.. _3xx: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes#3xx_redirection
-
-To filter out HTTP redirects (3xx_)::
-
-    make SEARXNG_DEBUG=1 search.checker.google_news | grep -A1 "HTTP/1.1\" 3[0-9][0-9]"
-    ...
-    Engine google news                   Checking
-    https://news.google.com:443 "GET /search?q=life&hl=en&lr=lang_en&ie=utf8&oe=utf8&ceid=US%3Aen&gl=US HTTP/1.1" 302 0
-    https://news.google.com:443 "GET /search?q=life&hl=en-US&lr=lang_en&ie=utf8&oe=utf8&ceid=US:en&gl=US HTTP/1.1" 200 None
-    --
-    https://news.google.com:443 "GET /search?q=computer&hl=en&lr=lang_en&ie=utf8&oe=utf8&ceid=US%3Aen&gl=US HTTP/1.1" 302 0
-    https://news.google.com:443 "GET /search?q=computer&hl=en-US&lr=lang_en&ie=utf8&oe=utf8&ceid=US:en&gl=US HTTP/1.1" 200 None
-    --
-
 .. _make themes:

 ``make themes.*``
@@ -60,7 +60,7 @@ paging = False
 time_range_support = False

 # Google-News results are always *SafeSearch*. Option 'safesearch' is set to
-# False here, otherwise checker will report safesearch-errors::
+# False here.
 #
 #  safesearch : results are identical for safesearch=0 and safesearch=2
 safesearch = True
@@ -139,26 +139,18 @@ def get_engine_errors(engline_name_list):
    return result


-def get_reliabilities(engline_name_list, checker_results):
+def get_reliabilities(engline_name_list):
    reliabilities = {}

    engine_errors = get_engine_errors(engline_name_list)

    for engine_name in engline_name_list:
-        checker_result = checker_results.get(engine_name, {})
-        checker_success = checker_result.get('success', True)
        errors = engine_errors.get(engine_name) or []
        sent_count = counter('engine', engine_name, 'search', 'count', 'sent')

        if sent_count == 0:
            # no request
            reliability = None
-        elif checker_success and not errors:
-            reliability = 100
-        elif 'simple' in checker_result.get('errors', {}):
-            # the basic (simple) test doesn't work: the engine is broken according to the checker
-            # even if there is no exception
-            reliability = 0
        else:
            # pylint: disable=consider-using-generator
            reliability = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')])
@@ -167,7 +159,6 @@ def get_reliabilities(engline_name_list, checker_results):
            'reliability': reliability,
            'sent_count': sent_count,
            'errors': errors,
-            'checker': checker_result.get('errors', {}),
        }
    return reliabilities

@@ -20,7 +20,6 @@ from searx.external_bang import get_bang_url
 from searx.metrics import initialize as initialize_metrics, counter_inc
 from searx.network import initialize as initialize_network, check_network_configuration
 from searx.results import ResultContainer
-from searx.search.checker import initialize as initialize_checker
 from searx.search.processors import PROCESSORS
 from searx.search.processors.abstract import RequestParams

@@ -33,7 +32,6 @@ logger = logger.getChild('search')

 def initialize(
    settings_engines: list[dict[str, t.Any]] = None,  # pyright: ignore[reportArgumentType]
-    enable_checker: bool = False,
    check_network: bool = False,
    enable_metrics: bool = True,
 ):
@@ -44,8 +42,6 @@ def initialize(
        check_network_configuration()
    initialize_metrics([engine['name'] for engine in settings_engines], enable_metrics)
    PROCESSORS.init(settings_engines)
-    if enable_checker:
-        initialize_checker()


 class Search:
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-# pylint: disable=missing-module-docstring
-
-from .impl import Checker
-from .background import initialize, get_result
-
-__all__ = ('Checker', 'initialize', 'get_result')
@@ -1,118 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-# pylint: disable=missing-module-docstring
-
-import sys
-import io
-import os
-import argparse
-import logging
-
-import searx.search
-import searx.search.checker
-from searx.search import PROCESSORS
-from searx.engines import engine_shortcuts
-
-
-# configure logging
-root = logging.getLogger()
-handler = logging.StreamHandler(sys.stdout)
-for h in root.handlers:
-    root.removeHandler(h)
-root.addHandler(handler)
-
-# color only for a valid terminal
-if sys.stdout.isatty() and os.environ.get('TERM') not in ['dumb', 'unknown']:
-    RESET_SEQ = "\033[0m"
-    COLOR_SEQ = "\033[1;%dm"
-    BOLD_SEQ = "\033[1m"
-    BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = map(lambda i: COLOR_SEQ % (30 + i), range(8))
-else:
-    RESET_SEQ = ""
-    COLOR_SEQ = ""
-    BOLD_SEQ = ""
-    BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = "", "", "", "", "", "", "", ""
-
-# equivalent of 'python -u' (unbuffered stdout, stderr)
-stdout = io.TextIOWrapper(
-    # pylint: disable=consider-using-with
-    open(sys.stdout.fileno(), 'wb', 0),
-    write_through=True,
-)
-stderr = io.TextIOWrapper(
-    # pylint: disable=consider-using-with
-    open(sys.stderr.fileno(), 'wb', 0),
-    write_through=True,
-)
-
-
-# iterator of processors
-def iter_processor(engine_name_list):
-    if len(engine_name_list) > 0:
-        for name in engine_name_list:
-            name = engine_shortcuts.get(name, name)
-            processor = PROCESSORS.get(name)
-            if processor is not None:
-                yield name, processor
-            else:
-                stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RED}Engine does not exist{RESET_SEQ}\n')
-    else:
-        for name, processor in searx.search.PROCESSORS.items():
-            yield name, processor
-
-
-# actual check & display
-def run(engine_name_list, verbose):
-    searx.search.initialize()
-    name_checker_list = []
-    for name, processor in iter_processor(engine_name_list):
-        stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
-        if not sys.stdout.isatty():
-            stderr.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}Checking\n')
-        checker = searx.search.checker.Checker(processor)
-        checker.run()
-        name_checker_list.append((name, checker))
-
-    stdout.write(f'\n== {BOLD_SEQ}Results{RESET_SEQ} ' + '=' * 70 + '\n')
-    for name, checker in name_checker_list:
-        if checker.test_results.successful:
-            stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{GREEN}OK{RESET_SEQ}\n')
-            if verbose:
-                stdout.write(f'    {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
-        else:
-            stdout.write(f'{BOLD_SEQ}Engine {name:30}{RESET_SEQ}{RESET_SEQ}{RED}Error{RESET_SEQ}')
-            if not verbose:
-                errors = [test_name + ': ' + error for test_name, error in checker.test_results]
-                stdout.write(f'{RED}Error {str(errors)}{RESET_SEQ}\n')
-            else:
-                stdout.write('\n')
-                stdout.write(f'    {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
-                for test_name, logs in checker.test_results.logs.items():
-                    for log in logs:
-                        log = map(lambda l: l if isinstance(l, str) else repr(l), log)
-                        stdout.write(f'    {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n')
-
-
-# call by setup.py
-def main():
-    parser = argparse.ArgumentParser(description='Check SearXNG engines.')
-    parser.add_argument(
-        'engine_name_list',
-        metavar='engine name',
-        type=str,
-        nargs='*',
-        help='engines name or shortcut list. Empty for all engines.',
-    )
-    parser.add_argument(
-        '--verbose',
-        '-v',
-        action='store_true',
-        dest='verbose',
-        help='Display details about the test results',
-        default=False,
-    )
-    args = parser.parse_args()
-    run(args.engine_name_list, args.verbose)
-
-
-if __name__ == '__main__':
-    main()
@@ -1,168 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-# pylint: disable=missing-module-docstring, cyclic-import
-
-import json
-import time
-import threading
-import os
-import signal
-from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
-
-import valkey.exceptions
-
-from searx import logger, settings, sxng_debug
-from searx.valkeydb import client as get_valkey_client
-from searx.exceptions import SearxSettingsException
-from searx.search.processors import PROCESSORS
-from searx.search.checker import Checker
-from searx.search.checker.scheduler import scheduler_function
-
-
-VALKEY_RESULT_KEY = 'SearXNG_checker_result'
-VALKEY_LOCK_KEY = 'SearXNG_checker_lock'
-
-
-CheckerResult = Union['CheckerOk', 'CheckerErr', 'CheckerOther']
-
-
-class CheckerOk(TypedDict):
-    """Checking the engines succeeded"""
-
-    status: Literal['ok']
-    engines: Dict[str, 'EngineResult']
-    timestamp: int
-
-
-class CheckerErr(TypedDict):
-    """Checking the engines failed"""
-
-    status: Literal['error']
-    timestamp: int
-
-
-class CheckerOther(TypedDict):
-    """The status is unknown or disabled"""
-
-    status: Literal['unknown', 'disabled']
-
-
-EngineResult = Union['EngineOk', 'EngineErr']
-
-
-class EngineOk(TypedDict):
-    """Checking the engine succeeded"""
-
-    success: Literal[True]
-
-
-class EngineErr(TypedDict):
-    """Checking the engine failed"""
-
-    success: Literal[False]
-    errors: Dict[str, List[str]]
-
-
-def _get_interval(every: Any, error_msg: str) -> Tuple[int, int]:
-    if isinstance(every, int):
-        return (every, every)
-
-    if (
-        not isinstance(every, (tuple, list))
-        or len(every) != 2  # type: ignore
-        or not isinstance(every[0], int)
-        or not isinstance(every[1], int)
-    ):
-        raise SearxSettingsException(error_msg, None)
-    return (every[0], every[1])
-
-
-def get_result() -> CheckerResult:
-    client = get_valkey_client()
-    if client is None:
-        # without Valkey, the checker is disabled
-        return {'status': 'disabled'}
-    serialized_result: Optional[bytes] = client.get(VALKEY_RESULT_KEY)
-    if serialized_result is None:
-        # the Valkey key does not exist
-        return {'status': 'unknown'}
-    return json.loads(serialized_result)
-
-
-def _set_result(result: CheckerResult):
-    client = get_valkey_client()
-    if client is None:
-        # without Valkey, the function does nothing
-        return
-    client.set(VALKEY_RESULT_KEY, json.dumps(result))
-
-
-def _timestamp():
-    return int(time.time() / 3600) * 3600
-
-
-def run():
-    try:
-        # use a Valkey lock to make sure there is no checker running at the same time
-        # (this should not happen, this is a safety measure)
-        with get_valkey_client().lock(VALKEY_LOCK_KEY, blocking_timeout=60, timeout=3600):
-            logger.info('Starting checker')
-            result: CheckerOk = {'status': 'ok', 'engines': {}, 'timestamp': _timestamp()}
-            for name, processor in PROCESSORS.items():
-                logger.debug('Checking %s engine', name)
-                checker = Checker(processor)
-                checker.run()
-                if checker.test_results.successful:
-                    result['engines'][name] = {'success': True}
-                else:
-                    result['engines'][name] = {'success': False, 'errors': checker.test_results.errors}
-
-            _set_result(result)
-            logger.info('Check done')
-    except valkey.exceptions.LockError:
-        _set_result({'status': 'error', 'timestamp': _timestamp()})
-        logger.exception('Error while running the checker')
-    except Exception:  # pylint: disable=broad-except
-        _set_result({'status': 'error', 'timestamp': _timestamp()})
-        logger.exception('Error while running the checker')
-
-
-def _signal_handler(_signum: int, _frame: Any):
-    t = threading.Thread(target=run)
-    t.daemon = True
-    t.start()
-
-
-def initialize():
-    if hasattr(signal, 'SIGUSR1'):
-        # Windows doesn't support SIGUSR1
-        logger.info('Send SIGUSR1 signal to pid %i to start the checker', os.getpid())
-        signal.signal(signal.SIGUSR1, _signal_handler)
-
-    # special case when debug is activate
-    if sxng_debug and settings['checker']['off_when_debug']:
-        logger.info('debug mode: checker is disabled')
-        return
-
-    # check value of checker.scheduling.every now
-    scheduling = settings['checker']['scheduling']
-    if scheduling is None or not scheduling:
-        logger.info('Checker scheduler is disabled')
-        return
-
-    # make sure there is a Valkey connection
-    if get_valkey_client() is None:
-        logger.error('The checker requires Valkey')
-        return
-
-    # start the background scheduler
-    every_range = _get_interval(scheduling.get('every', (300, 1800)), 'checker.scheduling.every is not a int or list')
-    start_after_range = _get_interval(
-        scheduling.get('start_after', (300, 1800)), 'checker.scheduling.start_after is not a int or list'
-    )
-    t = threading.Thread(
-        target=scheduler_function,
-        args=(start_after_range[0], start_after_range[1], every_range[0], every_range[1], run),
-        name='checker_scheduler',
-    )
-    t.daemon = True
-    t.start()
@@ -1,441 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-# pylint: disable=missing-module-docstring, invalid-name
-
-import gc
-import typing
-import types
-import functools
-import itertools
-from time import time
-from timeit import default_timer
-from urllib.parse import urlparse
-
-import re
-import httpx
-
-from searx import network, logger
-from searx.utils import gen_useragent, detect_language
-from searx.results import ResultContainer
-from searx.search.models import SearchQuery, EngineRef
-from searx.search.processors import EngineProcessor
-from searx.metrics import counter_inc
-
-
-logger = logger.getChild('searx.search.checker')
-
-HTML_TAGS = [
-    # fmt: off
-    'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script',
-    'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite',
-    'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small',
-    'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr', 'style', 'blockquote', 'dd', 'div', 'dl', 'dt',
-    'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input',
-    'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet',
-    'frame', 'frameset'
-    # fmt: on
-]
-
-
-def get_check_no_html():
-    rep = ['<' + tag + r'[^\>]*>' for tag in HTML_TAGS]
-    rep += ['</' + tag + '>' for tag in HTML_TAGS]
-    pattern = re.compile('|'.join(rep))
-
-    def f(text):
-        return pattern.search(text.lower()) is None
-
-    return f
-
-
-_check_no_html = get_check_no_html()
-
-
-def _is_url(url):
-    try:
-        result = urlparse(url)
-    except ValueError:
-        return False
-    if result.scheme not in ('http', 'https'):
-        return False
-    return True
-
-
-@functools.lru_cache(maxsize=8192)
-def _download_and_check_if_image(image_url: str) -> bool:
-    """Download an URL and check if the Content-Type starts with "image/"
-    This function should not be called directly: use _is_url_image
-    otherwise the cache of functools.lru_cache contains data: URL which might be huge.
-    """
-    retry = 2
-
-    while retry > 0:
-        a = time()
-        try:
-            # use "image_proxy" (avoid HTTP/2)
-            network.set_context_network_name('image_proxy')
-            r, stream = network.stream(
-                'GET',
-                image_url,
-                timeout=10.0,
-                allow_redirects=True,
-                headers={
-                    'User-Agent': gen_useragent(),
-                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-                    'Accept-Language': 'en-US;q=0.5,en;q=0.3',
-                    'DNT': '1',
-                    'Connection': 'keep-alive',
-                    'Upgrade-Insecure-Requests': '1',
-                    'Sec-GPC': '1',
-                    'Cache-Control': 'max-age=0',
-                },
-            )
-            r.close()
-            if r.status_code == 200:
-                is_image = r.headers.get('content-type', '').startswith('image/')
-            else:
-                is_image = False
-            del r
-            del stream
-            return is_image
-        except httpx.TimeoutException:
-            logger.error('Timeout for %s: %i', image_url, int(time() - a))
-            retry -= 1
-        except httpx.HTTPError:
-            logger.exception('Exception for %s', image_url)
-            return False
-    return False
-
-
-def _is_url_image(image_url) -> bool:
-    """Normalize image_url"""
-    if not isinstance(image_url, str):
-        return False
-
-    if image_url.startswith('//'):
-        image_url = 'https:' + image_url
-
-    if image_url.startswith('data:'):
-        return image_url.startswith('data:image/')
-
-    if not _is_url(image_url):
-        return False
-
-    return _download_and_check_if_image(image_url)
-
-
-def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing.Any]:
-    return {
-        'query': search_query.query,
-        'lang': search_query.lang,
-        'pageno': search_query.pageno,
-        'safesearch': search_query.safesearch,
-        'time_range': search_query.time_range,
-    }
-
-
-def _search_query_diff(
-    sq1: SearchQuery, sq2: SearchQuery
-) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]:
-    param1 = _search_query_to_dict(sq1)
-    param2 = _search_query_to_dict(sq2)
-    common = {}
-    diff = {}
-    for k, value1 in param1.items():
-        value2 = param2[k]
-        if value1 == value2:
-            common[k] = value1
-        else:
-            diff[k] = (value1, value2)
-    return (common, diff)
-
-
-class TestResults:  # pylint: disable=missing-class-docstring
-
-    __slots__ = 'errors', 'logs', 'languages'
-
-    def __init__(self):
-        self.errors: typing.Dict[str, typing.List[str]] = {}
-        self.logs: typing.Dict[str, typing.List[typing.Any]] = {}
-        self.languages: typing.Set[str] = set()
-
-    def add_error(self, test, message, *args):
-        # message to self.errors
-        errors_for_test = self.errors.setdefault(test, [])
-        if message not in errors_for_test:
-            errors_for_test.append(message)
-        # (message, *args) to self.logs
-        logs_for_test = self.logs.setdefault(test, [])
-        if (message, *args) not in logs_for_test:
-            logs_for_test.append((message, *args))
-
-    def add_language(self, language):
-        self.languages.add(language)
-
-    @property
-    def successful(self):
-        return len(self.errors) == 0
-
-    def __iter__(self):
-        for test_name, errors in self.errors.items():
-            for error in sorted(errors):
-                yield (test_name, error)
-
-
-class ResultContainerTests:  # pylint: disable=missing-class-docstring
-
-    __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results'
-
-    def __init__(
-        self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer
-    ):
-        self.test_name = test_name
-        self.search_query = search_query
-        self.result_container = result_container
-        self.languages: typing.Set[str] = set()
-        self.test_results = test_results
-        self.stop_test = False
-
-    @property
-    def result_urls(self):
-        results = self.result_container.get_ordered_results()
-        return [result['url'] for result in results if 'url' in result]
-
-    def _record_error(self, message: str, *args) -> None:
-        sq = _search_query_to_dict(self.search_query)
-        sqstr = ' '.join(['{}={!r}'.format(k, v) for k, v in sq.items()])
-        self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')')
-
-    def _add_language(self, text: str) -> typing.Optional[str]:
-        langStr = detect_language(text)
-        if langStr:
-            self.languages.add(langStr)
-            self.test_results.add_language(langStr)
-
-    def _check_result(self, result):
-        if not _check_no_html(result.get('title', '')):
-            self._record_error('HTML in title', repr(result.get('title', '')))
-        if not _check_no_html(result.get('content', '')):
-            self._record_error('HTML in content', repr(result.get('content', '')))
-        if result.get('url') is None:
-            self._record_error('url is None')
-
-        self._add_language(result.get('title', ''))
-        self._add_language(result.get('content', ''))
-
-        template = result.get('template', 'default.html')
-        if template == 'default.html':
-            return
-        if template == 'code.html':
-            return
-        if template == 'torrent.html':
-            return
-        if template == 'map.html':
-            return
-        if template == 'images.html':
-            thumbnail_src = result.get('thumbnail_src')
-            if thumbnail_src is not None:
-                if not _is_url_image(thumbnail_src):
-                    self._record_error('thumbnail_src URL is invalid', thumbnail_src)
-            elif not _is_url_image(result.get('img_src')):
-                self._record_error('img_src URL is invalid', result.get('img_src'))
-        if template == 'videos.html' and not _is_url_image(result.get('thumbnail')):
-            self._record_error('thumbnail URL is invalid', result.get('img_src'))
-
-    def _check_results(self, results: list):
-        for result in results:
-            self._check_result(result)
-
-    def _check_answers(self, answers):
-        for answer in answers:
-            if not _check_no_html(answer):
-                self._record_error('HTML in answer', answer)
-
-    def _check_infoboxes(self, infoboxes):
-        for infobox in infoboxes:
-            if not _check_no_html(infobox.get('content', '')):
-                self._record_error('HTML in infobox content', infobox.get('content', ''))
-            self._add_language(infobox.get('content', ''))
-            for attribute in infobox.get('attributes', {}):
-                if not _check_no_html(attribute.get('value', '')):
-                    self._record_error('HTML in infobox attribute value', attribute.get('value', ''))
-
-    def check_basic(self):
-        if len(self.result_container.unresponsive_engines) > 0:
-            for message in self.result_container.unresponsive_engines:
-                self._record_error(message[1] + ' ' + (message[2] or ''))
-            self.stop_test = True
-            return
-
-        results = self.result_container.get_ordered_results()
-        if len(results) > 0:
-            self._check_results(results)
-
-        if len(self.result_container.answers) > 0:
-            self._check_answers(self.result_container.answers)
-
-        if len(self.result_container.infoboxes) > 0:
-            self._check_infoboxes(self.result_container.infoboxes)
-
-    def has_infobox(self):
-        """Check the ResultContainer has at least one infobox"""
-        if len(self.result_container.infoboxes) == 0:
-            self._record_error('No infobox')
-
-    def has_answer(self):
-        """Check the ResultContainer has at least one answer"""
-        if len(self.result_container.answers) == 0:
-            self._record_error('No answer')
-
-    def has_language(self, lang):
-        """Check at least one title or content of the results is written in the `lang`.
-
-        Detected using pycld3, may be not accurate"""
-        if lang not in self.languages:
-            self._record_error(lang + ' not found')
-
-    def not_empty(self):
-        """Check the ResultContainer has at least one answer or infobox or result"""
-        result_types = set()
-        results = self.result_container.get_ordered_results()
-        if len(results) > 0:
-            result_types.add('results')
-
-        if len(self.result_container.answers) > 0:
-            result_types.add('answers')
-
-        if len(self.result_container.infoboxes) > 0:
-            result_types.add('infoboxes')
-
-        if len(result_types) == 0:
-            self._record_error('No result')
-
-    def one_title_contains(self, title: str):
-        """Check one of the title contains `title` (case insensitive comparison)"""
-        title = title.lower()
-        for result in self.result_container.get_ordered_results():
-            if title in result['title'].lower():
-                return
-        self._record_error(('{!r} not found in the title'.format(title)))
-
-
-class CheckerTests:  # pylint: disable=missing-class-docstring, too-few-public-methods
-
-    __slots__ = 'test_results', 'test_name', 'result_container_tests_list'
-
-    def __init__(
-        self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests]
-    ):
-        self.test_results = test_results
-        self.test_name = test_name
-        self.result_container_tests_list = result_container_tests_list
-
-    def unique_results(self):
-        """Check the results of each ResultContainer is unique"""
-        urls_list = [rct.result_urls for rct in self.result_container_tests_list]
-        if len(urls_list[0]) > 0:
-            # results on the first page
-            for i, urls_i in enumerate(urls_list):
-                for j, urls_j in enumerate(urls_list):
-                    if i < j and urls_i == urls_j:
-                        common, diff = _search_query_diff(
-                            self.result_container_tests_list[i].search_query,
-                            self.result_container_tests_list[j].search_query,
-                        )
-                        common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()])
-                        diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()])
-                        diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()])
-                        self.test_results.add_error(
-                            self.test_name,
-                            'results are identical for {} and {} ({})'.format(diff1_str, diff2_str, common_str),
-                        )
-
-
-class Checker:  # pylint: disable=missing-class-docstring
-
-    __slots__ = 'processor', 'tests', 'test_results'
-
-    def __init__(self, processor: EngineProcessor):
-        self.processor = processor
-        self.tests = self.processor.get_tests()
-        self.test_results = TestResults()
-
-    @property
-    def engineref_list(self):
-        engine_name = self.processor.engine_name
-        engine_category = self.processor.engine.categories[0]
-        return [EngineRef(engine_name, engine_category)]
-
-    @staticmethod
-    def search_query_matrix_iterator(engineref_list, matrix):
-        p = []
-        for name, values in matrix.items():
-            if isinstance(values, (tuple, list)):
-                l = [(name, value) for value in values]
-            else:
-                l = [(name, values)]
-            p.append(l)
-
-        for kwargs in itertools.product(*p):
-            kwargs = dict(kwargs)
-            query = kwargs['query']
-            params = dict(kwargs)
-            del params['query']
-            yield SearchQuery(query, engineref_list, **params)
-
-    def call_test(self, obj, test_description):
-        if isinstance(test_description, (tuple, list)):
-            method, args = test_description[0], test_description[1:]
-        else:
-            method = test_description
-            args = ()
-        if isinstance(method, str) and hasattr(obj, method):
-            getattr(obj, method)(*args)
-        elif isinstance(method, types.FunctionType):
-            method(*args)
-        else:
-            self.test_results.add_error(
-                obj.test_name,
-                'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__),
-            )
-
-    def call_tests(self, obj, test_descriptions):
-        for test_description in test_descriptions:
-            self.call_test(obj, test_description)
-
-    def search(self, search_query: SearchQuery) -> ResultContainer:
-        result_container = ResultContainer()
-        engineref_category = search_query.engineref_list[0].category
-        params = self.processor.get_params(search_query, engineref_category)
-        if params is not None:
-            counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent')
-            self.processor.search(search_query.query, params, result_container, default_timer(), 5)
-        return result_container
-
-    def get_result_container_tests(self, test_name: str, search_query: SearchQuery) -> ResultContainerTests:
-        result_container = self.search(search_query)
-        result_container_check = ResultContainerTests(self.test_results, test_name, search_query, result_container)
-        result_container_check.check_basic()
-        return result_container_check
-
-    def run_test(self, test_name):
-        test_parameters = self.tests[test_name]
-        search_query_list = list(Checker.search_query_matrix_iterator(self.engineref_list, test_parameters['matrix']))
-        rct_list = [self.get_result_container_tests(test_name, search_query) for search_query in search_query_list]
-        stop_test = False
-        if 'result_container' in test_parameters:
-            for rct in rct_list:
-                stop_test = stop_test or rct.stop_test
-                if not rct.stop_test:
-                    self.call_tests(rct, test_parameters['result_container'])
-        if not stop_test:
-            if 'test' in test_parameters:
-                checker_tests = CheckerTests(self.test_results, test_name, rct_list)
-                self.call_tests(checker_tests, test_parameters['test'])
-
-    def run(self):
-        for test_name in self.tests:
-            self.run_test(test_name)
-            # clear cache
-            _download_and_check_if_image.cache_clear()
-            # force a garbage collector
-            gc.collect()
@@ -1,36 +0,0 @@
-- SPDX-License-Identifier: AGPL-3.0-or-later
--
-- This script is not a string in scheduler.py, so editors can provide syntax highlighting.
-
-- The Valkey KEY is defined here and not in Python on purpose:
-- only this LUA script can read and update this key to avoid lock and concurrency issues.
-local valkey_key = 'SearXNG_checker_next_call_ts'
-
-local now = redis.call('TIME')[1]
-local start_after_from = ARGV[1]
-local start_after_to = ARGV[2]
-local every_from = ARGV[3]
-local every_to = ARGV[4]
-
-local next_call_ts = redis.call('GET', valkey_key)
-
-if (next_call_ts == false or next_call_ts == nil) then
-    -- the scheduler has never run on this Valkey instance, so:
-    -- 1/ the scheduler does not run now
-    -- 2/ the next call is a random time between start_after_from and start_after_to
-    local initial_delay = math.random(start_after_from, start_after_to)
-    redis.call('SET', valkey_key, now + initial_delay)
-    return { false, initial_delay }
-end
-
-- next_call_ts is defined
-- --> if now is lower than next_call_ts then we don't run the embedded checker
-- --> if now is higher then we update next_call_ts and ask to run the embedded checker now.
-local call_now = next_call_ts <= now
-if call_now then
-    -- the checker runs now, define the timestamp of the next call:
-    -- this is a random delay between every_from and every_to
-    local periodic_delay = math.random(every_from, every_to)
-    next_call_ts = redis.call('INCRBY', valkey_key, periodic_delay)
-end
-return { call_now, next_call_ts - now }
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-# pylint: disable=missing-module-docstring
-"""Lame scheduler which use Valkey as a source of truth:
-* the Valkey key SearXNG_checker_next_call_ts contains the next time the embedded checker should run.
-* to avoid lock, a unique Valkey script reads and updates the Valkey key SearXNG_checker_next_call_ts.
-* this Valkey script returns a list of two elements:
-   * the first one is a boolean. If True, the embedded checker must run now in this worker.
-   * the second element is the delay in second to wait before the next call to the Valkey script.
-
-This scheduler is not generic on purpose: if more feature are required, a dedicate scheduler must be used
-(= a better scheduler should not use the web workers)
-"""
-
-import logging
-import time
-from pathlib import Path
-from typing import Callable
-
-from searx.valkeydb import client as get_valkey_client
-from searx.valkeylib import lua_script_storage
-
-
-logger = logging.getLogger('searx.search.checker')
-
-SCHEDULER_LUA = Path(__file__).parent / "scheduler.lua"
-
-
-def scheduler_function(start_after_from: int, start_after_to: int, every_from: int, every_to: int, callback: Callable):
-    """Run the checker periodically. The function never returns.
-
-    Parameters:
-    * start_after_from and start_after_to: when to call "callback" for the first on the Valkey instance
-    * every_from and every_to: after the first call, how often to call "callback"
-
-    There is no issue:
-    * to call this function is multiple workers
-    * to kill workers at any time as long there is one at least one worker
-    """
-    scheduler_now_script = SCHEDULER_LUA.open().read()
-    while True:
-        # ask the Valkey script what to do
-        # the script says
-        # * if the checker must run now.
-        # * how to long to way before calling the script again (it can be call earlier, but not later).
-        script = lua_script_storage(get_valkey_client(), scheduler_now_script)
-        call_now, wait_time = script(args=[start_after_from, start_after_to, every_from, every_to])
-
-        # does the worker run the checker now?
-        if call_now:
-            # run the checker
-            try:
-                callback()
-            except Exception:  # pylint: disable=broad-except
-                logger.exception("Error calling the embedded checker")
-            # only worker display the wait_time
-            logger.info("Next call to the checker in %s seconds", wait_time)
-        # wait until the next call
-        time.sleep(wait_time)
@@ -277,53 +277,6 @@ plugins:
 # '(.*\.)?youtu\.be$': 'yt.example.com'
 #

-checker:
-  # disable checker when in debug mode
-  off_when_debug: true
-
-  # use "scheduling: {}" to disable scheduling
-  # scheduling: interval or int
-
-  # to activate the scheduler:
-  # * uncomment "scheduling" section
-  # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1"
-  #   to your uwsgi.ini
-
-  # scheduling:
-  #   start_after: [300, 1800]  # delay to start the first run of the checker
-  #   every: [86400, 90000]     # how often the checker runs
-
-  # additional tests: only for the YAML anchors (see the engines section)
-  #
-  additional_tests:
-    rosebud: &test_rosebud
-      matrix:
-        query: rosebud
-        lang: en
-      result_container:
-        - not_empty
-        - ['one_title_contains', 'citizen kane']
-      test:
-        - unique_results
-
-    android: &test_android
-      matrix:
-        query: ['android']
-        lang: ['en', 'de', 'fr', 'zh-CN']
-      result_container:
-        - not_empty
-        - ['one_title_contains', 'google']
-      test:
-        - unique_results
-
-  # tests: only for the YAML anchors (see the engines section)
-  tests:
-    infobox: &tests_infobox
-      infobox:
-        matrix:
-          query: ["linux", "new york", "bbc"]
-        result_container:
-          - has_infobox

 categories_as_tabs:
  general:
@@ -746,7 +699,6 @@ engines:
    shortcut: ddd
    weight: 2
    disabled: true
-    tests: *tests_infobox

  # cloudflare protected
  # - name: digbt
@@ -820,7 +772,6 @@ engines:
    weight: 2
    # add "list" to the array to get results in the results list
    display_type: ["infobox"]
-    tests: *tests_infobox
    categories: [general]

  - name: duckduckgo
@@ -1053,32 +1004,18 @@ engines:
  - name: google
    engine: google
    shortcut: go
-    # additional_tests:
-    #   android: *test_android

  - name: google images
    engine: google_images
    shortcut: goi
-    # additional_tests:
-    #   android: *test_android
-    #   dali:
-    #     matrix:
-    #       query: ['Dali Christ']
-    #       lang: ['en', 'de', 'fr', 'zh-CN']
-    #     result_container:
-    #       - ['one_title_contains', 'Salvador']

  - name: google news
    engine: google_news
    shortcut: gon
-    # additional_tests:
-    #   android: *test_android

  - name: google videos
    engine: google_videos
    shortcut: gov
-    # additional_tests:
-    #   android: *test_android

  - name: google scholar
    engine: google_scholar
@@ -1784,8 +1721,6 @@ engines:
    shortcut: qw
    categories: [general, web]
    disabled: true
-    additional_tests:
-      rosebud: *test_rosebud

  - name: qwant news
    qwant_categ: news
@@ -2021,8 +1956,6 @@ engines:
    shortcut: sp
    startpage_categ: web
    categories: [general, web]
-    additional_tests:
-      rosebud: *test_rosebud

  - name: startpage news
    engine: startpage
@@ -2245,8 +2178,6 @@ engines:
    base_url: "https://{language}.wikiquote.org/"
    search_type: text
    disabled: true
-    additional_tests:
-      rosebud: *test_rosebud
    about:
      website: https://www.wikiquote.org/
      wikidata_id: Q369
@@ -2273,16 +2204,6 @@ engines:
    about:
      website: https://species.wikimedia.org/
      wikidata_id: Q13679
-    tests:
-      wikispecies:
-        matrix:
-          query: "Campbell, L.I. et al. 2011: MicroRNAs"
-          lang: en
-        result_container:
-          - not_empty
-          - ['one_title_contains', 'Tardigrada']
-        test:
-          - unique_results

  - name: wiktionary
    engine: mediawiki
@@ -267,10 +267,6 @@ SCHEMA: dict[str, t.Any] = {
        'networks': {},
    },
    'plugins': SettingsValue(dict, {}),
-    'checker': {
-        'off_when_debug': SettingsValue(bool, True, None),
-        'scheduling': SettingsValue((None, dict), None, None),
-    },
    'categories_as_tabs': SettingsValue(dict, CATEGORIES_AS_TABS),
    'engines': SettingsValue(list, []),
    'doi_resolvers': {},
@@ -54,10 +54,6 @@ or manually by executing the searx/webapp.py file? -->
 {{'  '}}* Function: `{{ error.function }}`
 {{'  '}}* Code: `{{ error.code }}`
 {{'\n'-}}
-{%- endfor -%}
-{%- for test_name, results in engine_reliability.checker.items() -%}
-{%- if loop.first %}Checker{% endif -%}
-{{-'\n  '}}* {{ test_name }}: {% for result in results%}`{{ result }}`,{% endfor -%}
 {%- endfor -%}
    </textarea>
    <input type="checkbox" id="step1">
@@ -64,7 +64,7 @@
    {%- if search_engine.enable_http -%}
      <p>{{- icon_big('exclamation-sign', 'No HTTPS') -}}{{- _('No HTTPS')-}}</p>
    {% endif -%}
-    {%- if reliabilities.get(search_engine.name, {}).errors or reliabilities.get(search_engine.name, {}).checker -%}
+    {%- if reliabilities.get(search_engine.name, {}).errors -%}
      <a href="{{ url_for('stats', engine=search_engine.name|e) }}" {{- ' ' -}}
         title="{{ _('View error logs and submit a bug report') }}">
      {{- _('View error logs and submit a bug report') -}}
@@ -109,7 +109,6 @@

 {%- macro engine_reliability(engine_name) -%}
  {%- set r = reliabilities.get(engine_name, {}).get('reliability', None) -%}
-  {%- set checker_result = reliabilities.get(engine_name, {}).get('checker', []) -%}
  {%- set errors = reliabilities.get(engine_name, {}).get('errors', []) -%}
  {%- if r != None -%}
    {%- if r <= 50 -%}
@@ -124,7 +123,7 @@
  {% else %}
    {%- set r = '' -%}
  {%- endif -%}
-  {%- if checker_result or errors -%}
+  {%- if errors -%}
    <td class="{{ label }} column-reliability">{{- '' -}}
      <a href="{{ url_for('stats', engine=engine_name|e) }}">{{- '' -}}
        <span>
@@ -132,9 +131,6 @@
        </span>{{- '' -}}
      </a>{{- '' -}}
      <div class="engine-tooltip" role="tooltip" id="{{engine_name}}_reliability">
-        {%- if checker_result -%}
-          <p>{{ _("Failed checker test(s): ") }} {{ ', '.join(checker_result) }}</p>
-        {%- endif -%}
        {%- if errors -%}<p>{{ _('Errors:') }}</p>{%- endif -%}
        {%- for error in errors -%}
          <p>{{ error }}</p>{{- '' -}}
@@ -124,23 +124,6 @@
                {% endif %}
            {% endfor %}
        {% endfor %}
-        {% if engine_reliabilities[selected_engine_name].checker %}
-            <h3>{{ _('Checker') }}</h3>
-            <table>
-                <tr>
-                    <th scope="col" class="failed-test">{{ _('Failed test') }}</th>
-                    <th scope="col">{{ _('Comment(s)') }}</th>
-                </tr>
-                {% for test_name, results in engine_reliabilities[selected_engine_name].checker.items() %}
-                <tr>
-                    <td>{{ test_name }}</td>
-                    <td>
-                        {% for r in results %}<p>{{ r }}</p>{% endfor %}
-                    </td>
-                </tr>
-                {% endfor %}
-            </table>
-        {% endif %}
        {{ new_issue(selected_engine_name, engine_reliabilities[selected_engine_name]) }}
    </div>
 {% endif %}
@@ -118,7 +118,6 @@ from searx.valkeydb import initialize as valkey_initialize
 from searx.sxng_locales import sxng_locales
 import searx.search
 from searx.network import stream as http_stream, set_context_network_name
-from searx.search.checker import get_result as checker_get_result


 logger = logger.getChild('webapp')
@@ -929,23 +928,11 @@ def preferences():
    # reliabilities
    reliabilities = {}
    engine_errors = get_engine_errors(filtered_engines)
-    checker_results = checker_get_result()
-    checker_results = (
-        checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
-    )
    for _, e in filtered_engines.items():
-        checker_result = checker_results.get(e.name, {})
-        checker_success = checker_result.get('success', True)
        errors = engine_errors.get(e.name) or []
        if counter('engine', e.name, 'search', 'count', 'sent') == 0:
            # no request
            reliability = None
-        elif checker_success and not errors:
-            reliability = 100
-        elif 'simple' in checker_result.get('errors', {}):
-            # the basic (simple) test doesn't work: the engine is broken according to the checker
-            # even if there is no exception
-            reliability = 0
        else:
            # pylint: disable=consider-using-generator
            reliability = 100 - sum([error['percentage'] for error in errors if not error.get('secondary')])
@@ -953,10 +940,7 @@ def preferences():
        reliabilities[e.name] = {
            'reliability': reliability,
            'errors': [],
-            'checker': checker_results.get(e.name, {}).get('errors', {}).keys(),
        }
-        # keep the order of the list checker_results[e.name]['errors'] and deduplicate.
-        # the first element has the highest percentage rate.
        reliabilities_errors = []
        for error in errors:
            error_user_text = None
@@ -977,13 +961,6 @@ def preferences():
        )
        safesearch = e.safesearch
        time_range_support = e.time_range_support
-        for checker_test_name in checker_results.get(e.name, {}).get('errors', {}):
-            if supports_selected_language and checker_test_name.startswith('lang_'):
-                supports_selected_language = '?'
-            elif safesearch and checker_test_name == 'safesearch':
-                safesearch = '?'
-            elif time_range_support and checker_test_name == 'time_range':
-                time_range_support = '?'
        supports[e.name] = {
            'supports_selected_language': supports_selected_language,
            'safesearch': safesearch,
@@ -1133,13 +1110,8 @@ def stats():
        else:
            filtered_engines = [selected_engine_name]

-    checker_results = checker_get_result()
-    checker_results = (
-        checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
-    )
-
    engine_stats = get_engines_stats(filtered_engines)
-    engine_reliabilities = get_reliabilities(filtered_engines, checker_results)
+    engine_reliabilities = get_reliabilities(filtered_engines)

    if sort_order not in STATS_SORT_PARAMETERS:
        sort_order = 'name'
@@ -1194,12 +1166,6 @@ def stats_errors():
    return jsonify(result)


-@app.route('/stats/checker', methods=['GET'])
-def stats_checker():
-    result = checker_get_result()
-    return jsonify(result)
-
-
@app.route('/metrics')
 def stats_open_metrics():
    password = settings['general'].get("open_metrics")
@@ -1212,13 +1178,8 @@ def stats_open_metrics():

    filtered_engines = dict(filter(lambda kv: sxng_request.preferences.validate_token(kv[1]), engines.items()))

-    checker_results = checker_get_result()
-    checker_results = (
-        checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {}
-    )
-
    engine_stats = get_engines_stats(filtered_engines)
-    engine_reliabilities = get_reliabilities(filtered_engines, checker_results)
+    engine_reliabilities = get_reliabilities(filtered_engines)
    metrics_text = openmetrics(engine_stats, engine_reliabilities)

    return Response(metrics_text, mimetype='text/plain')
@@ -1394,7 +1355,7 @@ def init():
    searx.plugins.initialize(app)

    metrics: bool = get_setting("general.enable_metrics")  # type: ignore
-    searx.search.initialize(enable_checker=True, check_network=True, enable_metrics=metrics)
+    searx.search.initialize(check_network=True, enable_metrics=metrics)

    limiter.initialize(app, settings)
    favicons.init()
@@ -39,7 +39,7 @@ setup(
    ],
    project_urls={"Code": GIT_URL, "Issue tracker": get_setting('brand.issue_url')},
    entry_points={
-        'console_scripts': ['searxng-run = searx.webapp:run', 'searxng-checker = searx.search.checker.__main__:main']
+        'console_scripts': ['searxng-run = searx.webapp:run']
    },
    packages=find_packages(
        include=[
@@ -54,7 +54,6 @@ setup(
            'settings.yml',
            '*.toml',
            '*.msg',
-            'search/checker/scheduler.lua',
            'data/*.json',
            'data/*.txt',
            'data/*.ftz',
@@ -82,7 +82,6 @@ class SearxTestCase(aiounittest.AsyncTestCase):
        # - initialize searx.network, searx.metrics, searx.processors and searx.search.checker

        searx.search.initialize(
-            enable_checker=True,
            check_network=True,
            enable_metrics=searx.get_setting("general.enable_metrics"),  # type: ignore
        )