From 26801e92af55cfef264fc34f0b84473e5bfc94cd Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 10 Jun 2026 15:48:49 +0200 Subject: [PATCH] [fix] sqlitedb: create DB Schema (DDL) during app initialization (hardening) (#6187) The initialization of the DB schema ("base schema") has so far been done on demand, which causes race conditions with competing threads and processes. The DDL statements for creating the "base schema" are now executed as part of the initialization of the app. Further improvements were made to harden the database applications: - Wikidata & Radio-Browser engine perform their initialization only once (so far the initialization was carried out in each thread/process). - If multiple processes try to set DB's WAL mode when opening the DB at the same time, this usually leads to another race condition, which is now also caught. Related: - https://github.com/searxng/searxng/issues/6181#issuecomment-4586705 Closes: #6181 Signed-off-by: Markus Heiser --- searx/cache.py | 13 ++++++++----- searx/engines/__init__.py | 5 ++++- searx/engines/radio_browser.py | 15 ++++++++++++++- searx/engines/wikidata.py | 15 ++++++++++++++- searx/search/processors/abstract.py | 5 ++++- searx/sqlitedb.py | 23 ++++++++++++++++------- searx/webapp.py | 15 ++++++++++++++- 7 files changed, 74 insertions(+), 17 deletions(-) diff --git a/searx/cache.py b/searx/cache.py index f7c256078..e6b6541d9 100644 --- a/searx/cache.py +++ b/searx/cache.py @@ -444,12 +444,10 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any: """Get value of ``key`` from table given by argument ``ctx``. If ``ctx`` argument is ``None`` (the default), a table name is generated - from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in - table), the ``default`` value is returned. - + from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists in + the table or the table not exists, the ``default`` value is returned. """ table = ctx - self.maintenance() if not table: table = self.normalize_name(self.cfg.name) @@ -457,6 +455,9 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): if table not in self.table_names: return default + # Before values are taken from the table, a maintenance interval may + # need to be carried out. + self.maintenance() sql = f"SELECT value FROM {table} WHERE key = ?" row = self.DB.execute(sql, (key,)).fetchone() if row is None: @@ -469,12 +470,14 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): If ``ctx`` argument is ``None`` (the default), a table name is generated from the :py:obj:`ExpireCacheCfg.name`.""" table = ctx - self.maintenance() if not table: table = self.normalize_name(self.cfg.name) if table in self.table_names: + # Before values are taken from the table, a maintenance interval may + # need to be carried out. + self.maintenance() for row in self.DB.execute(f"SELECT key, value FROM {table}"): yield row[0], self.deserialize(row[1]) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 1b1574b8b..e906b780b 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -12,6 +12,7 @@ import typing as t import sys import copy +import os from os.path import realpath, dirname import types @@ -278,6 +279,8 @@ def load_engines(engine_list: list[dict[str, t.Any]]): else: # if an engine can't be loaded (if for example the engine is missing # tor or some other requirements) its set to inactive! - logger.error("loading engine %s failed: set engine to inactive!", engine_data.get("name", "???")) + logger.error( + f"(PID {os.getpid()}) loading engine %s failed: set engine to inactive!", engine_data.get("name", "???") + ) engine_data["inactive"] = True return engines diff --git a/searx/engines/radio_browser.py b/searx/engines/radio_browser.py index 8714b7104..d79963c2e 100644 --- a/searx/engines/radio_browser.py +++ b/searx/engines/radio_browser.py @@ -6,6 +6,7 @@ """ +import os import random import socket from urllib.parse import urlencode @@ -59,7 +60,19 @@ seconds.""" def init(_): global CACHE # pylint: disable=global-statement CACHE = EngineCache("radio_browser") - server_list() + + # In an environment with competing processes, the initial loading of the + # cache is required only once. + eng_state: str | None = CACHE.get("eng_state") + if not eng_state or not eng_state.startswith("STATE:"): + CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}") + try: + server_list() + except Exception: + CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.") + raise + else: + logger.debug(eng_state) def server_list() -> list[str]: diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 3902f79e0..85f747d93 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -7,6 +7,7 @@ Some implementations are shared from :ref:`wikipedia engine`. import typing as t +import os from hashlib import md5 from urllib.parse import urlencode, unquote from json import loads @@ -827,7 +828,19 @@ def debug_explain_wikidata_query(query: str, method: str = "GET"): def init(_): global CACHE # pylint: disable=global-statement CACHE = EngineCache("wikidata") - init_wikidata_properties() + + # In an environment with competing processes, the initial loading of the + # cache is required only once. + eng_state: str | None = CACHE.get("eng_state") + if not eng_state or not eng_state.startswith("STATE:"): + CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}") + try: + init_wikidata_properties() + except Exception: + CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.") + raise + else: + logger.debug(eng_state) def init_wikidata_properties(): diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index cc19077b2..a504cdcba 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -3,6 +3,7 @@ import typing as t +import os import logging import threading from abc import abstractmethod, ABC @@ -154,7 +155,9 @@ class EngineProcessor(ABC): try: init_ok = self.engine.init(eng_setting) except Exception: # pylint: disable=broad-except - logger.exception("Init method of engine %s failed due to an exception.", self.engine.name) + logger.exception( + f"(PID {os.getpid()}) Init method of engine %s failed due to an exception.", self.engine.name + ) init_ok = False # In older engines, None is returned from the init method, which is # equivalent to indicating that the initialization was successful. diff --git a/searx/sqlitedb.py b/searx/sqlitedb.py index a4f498339..da8147166 100644 --- a/searx/sqlitedb.py +++ b/searx/sqlitedb.py @@ -121,8 +121,8 @@ class SQLiteAppl(abc.ABC): .. _WAL: https://sqlite.org/wal.html """ - SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = { - # "timeout": 5.0, + SQLITE_CONNECT_ARGS: dict[str, str | float | int | bool | None] = { + "timeout": 3.0, # default is 5sec # "detect_types": 0, "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"), "cached_statements": 0, # https://github.com/python/cpython/issues/118172 @@ -195,6 +195,7 @@ class SQLiteAppl(abc.ABC): self.db_url: str = db_url self.properties: SQLiteProperties = SQLiteProperties(db_url) self._init_done: bool = False + self._DB: sqlite3.Connection | None = None self._compatibility() # atexit.register(self.tear_down) @@ -209,7 +210,7 @@ class SQLiteAppl(abc.ABC): def _compatibility(self): if self.SQLITE_THREADING_MODE == "serialized": - self._DB: sqlite3.Connection | None = None + self._DB = None else: msg = ( f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode," @@ -228,7 +229,13 @@ class SQLiteAppl(abc.ABC): def _connect(self) -> sqlite3.Connection: conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore - conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}") + try: + with conn: + conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}") + except sqlite3.OperationalError: + # when database is locked, the journal_mode is already set by + # different but concurrent process (no need to set it once more) + pass self.register_functions(conn) return conn @@ -312,7 +319,8 @@ class SQLiteAppl(abc.ABC): # Since more than one instance of SQLiteAppl share the same DB # connection, we need to make sure that each SQLiteAppl instance has run # its init method at least once. - self.init(conn) + with conn: + self.init(conn) return conn @@ -330,7 +338,8 @@ class SQLiteAppl(abc.ABC): self._init_done = True logger.debug("init DB: %s", self.db_url) - self.properties.init(conn) + with conn: + self.properties.init(conn) ver = self.properties("DB_SCHEMA") if ver is None: @@ -409,7 +418,7 @@ CREATE TABLE IF NOT EXISTS properties ( self._init_done = True logger.debug("init properties of DB: %s", self.db_url) res = conn.execute(self.SQL_TABLE_EXISTS) - if res.fetchone() is None: # DB schema needs to be be created + if res.fetchone() is None: # DB schema needs to be created self.create_schema(conn) return True diff --git a/searx/webapp.py b/searx/webapp.py index d83ab149a..55ef84e8d 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -1348,6 +1348,8 @@ def run(): def init(): + # pylint: disable=import-outside-toplevel + if searx.sxng_debug or app.debug: app.debug = True searx.sxng_debug = True @@ -1358,6 +1360,18 @@ def init(): logger.error("server.secret_key is not changed. Please use something else instead of ultrasecretkey.") sys.exit(1) + # init database schema first / DB schema is created with the first connect + from searx.data import get_cache + from searx.enginelib import ENGINES_CACHE + + conn = get_cache().connect() + conn.close() + conn = ENGINES_CACHE.connect() + conn.close() + + favicons.init() + + # init application locales_initialize() valkey_initialize() searx.plugins.initialize(app) @@ -1366,7 +1380,6 @@ def init(): searx.search.initialize(check_network=True, enable_metrics=metrics) limiter.initialize(app, settings) - favicons.init() def static_headers(headers: Headers, _path: str, _url: str) -> None: