[fix] sqlitedb: create DB Schema (DDL) during app initialization (hardening) (#6187)

The initialization of the DB schema ("base schema") has so far been done on
demand, which causes race conditions with competing threads and processes.

The DDL statements for creating the "base schema" are now executed as part of
the initialization of the app.

Further improvements were made to harden the database applications:

- Wikidata & Radio-Browser engine perform their initialization only once (so far
  the initialization was carried out in each thread/process).

- If multiple processes try to set DB's WAL mode when opening the DB at the same
  time, this usually leads to another race condition, which is now also caught.

Related:

- https://github.com/searxng/searxng/issues/6181#issuecomment-4586705

Closes: #6181

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser
2026-06-10 15:48:49 +02:00
committed by GitHub
parent f3fab143be
commit 26801e92af
7 changed files with 74 additions and 17 deletions
+8 -5
View File
@@ -444,12 +444,10 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any: def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
"""Get value of ``key`` from table given by argument ``ctx``. If """Get value of ``key`` from table given by argument ``ctx``. If
``ctx`` argument is ``None`` (the default), a table name is generated ``ctx`` argument is ``None`` (the default), a table name is generated
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists in
table), the ``default`` value is returned. the table or the table not exists, the ``default`` value is returned.
""" """
table = ctx table = ctx
self.maintenance()
if not table: if not table:
table = self.normalize_name(self.cfg.name) table = self.normalize_name(self.cfg.name)
@@ -457,6 +455,9 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
if table not in self.table_names: if table not in self.table_names:
return default return default
# Before values are taken from the table, a maintenance interval may
# need to be carried out.
self.maintenance()
sql = f"SELECT value FROM {table} WHERE key = ?" sql = f"SELECT value FROM {table} WHERE key = ?"
row = self.DB.execute(sql, (key,)).fetchone() row = self.DB.execute(sql, (key,)).fetchone()
if row is None: if row is None:
@@ -469,12 +470,14 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
If ``ctx`` argument is ``None`` (the default), a table name is If ``ctx`` argument is ``None`` (the default), a table name is
generated from the :py:obj:`ExpireCacheCfg.name`.""" generated from the :py:obj:`ExpireCacheCfg.name`."""
table = ctx table = ctx
self.maintenance()
if not table: if not table:
table = self.normalize_name(self.cfg.name) table = self.normalize_name(self.cfg.name)
if table in self.table_names: if table in self.table_names:
# Before values are taken from the table, a maintenance interval may
# need to be carried out.
self.maintenance()
for row in self.DB.execute(f"SELECT key, value FROM {table}"): for row in self.DB.execute(f"SELECT key, value FROM {table}"):
yield row[0], self.deserialize(row[1]) yield row[0], self.deserialize(row[1])
+4 -1
View File
@@ -12,6 +12,7 @@ import typing as t
import sys import sys
import copy import copy
import os
from os.path import realpath, dirname from os.path import realpath, dirname
import types import types
@@ -278,6 +279,8 @@ def load_engines(engine_list: list[dict[str, t.Any]]):
else: else:
# if an engine can't be loaded (if for example the engine is missing # if an engine can't be loaded (if for example the engine is missing
# tor or some other requirements) its set to inactive! # tor or some other requirements) its set to inactive!
logger.error("loading engine %s failed: set engine to inactive!", engine_data.get("name", "???")) logger.error(
f"(PID {os.getpid()}) loading engine %s failed: set engine to inactive!", engine_data.get("name", "???")
)
engine_data["inactive"] = True engine_data["inactive"] = True
return engines return engines
+13
View File
@@ -6,6 +6,7 @@
""" """
import os
import random import random
import socket import socket
from urllib.parse import urlencode from urllib.parse import urlencode
@@ -59,7 +60,19 @@ seconds."""
def init(_): def init(_):
global CACHE # pylint: disable=global-statement global CACHE # pylint: disable=global-statement
CACHE = EngineCache("radio_browser") CACHE = EngineCache("radio_browser")
# In an environment with competing processes, the initial loading of the
# cache is required only once.
eng_state: str | None = CACHE.get("eng_state")
if not eng_state or not eng_state.startswith("STATE:"):
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
try:
server_list() server_list()
except Exception:
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
raise
else:
logger.debug(eng_state)
def server_list() -> list[str]: def server_list() -> list[str]:
+13
View File
@@ -7,6 +7,7 @@ Some implementations are shared from :ref:`wikipedia engine`.
import typing as t import typing as t
import os
from hashlib import md5 from hashlib import md5
from urllib.parse import urlencode, unquote from urllib.parse import urlencode, unquote
from json import loads from json import loads
@@ -827,7 +828,19 @@ def debug_explain_wikidata_query(query: str, method: str = "GET"):
def init(_): def init(_):
global CACHE # pylint: disable=global-statement global CACHE # pylint: disable=global-statement
CACHE = EngineCache("wikidata") CACHE = EngineCache("wikidata")
# In an environment with competing processes, the initial loading of the
# cache is required only once.
eng_state: str | None = CACHE.get("eng_state")
if not eng_state or not eng_state.startswith("STATE:"):
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
try:
init_wikidata_properties() init_wikidata_properties()
except Exception:
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
raise
else:
logger.debug(eng_state)
def init_wikidata_properties(): def init_wikidata_properties():
+4 -1
View File
@@ -3,6 +3,7 @@
import typing as t import typing as t
import os
import logging import logging
import threading import threading
from abc import abstractmethod, ABC from abc import abstractmethod, ABC
@@ -154,7 +155,9 @@ class EngineProcessor(ABC):
try: try:
init_ok = self.engine.init(eng_setting) init_ok = self.engine.init(eng_setting)
except Exception: # pylint: disable=broad-except except Exception: # pylint: disable=broad-except
logger.exception("Init method of engine %s failed due to an exception.", self.engine.name) logger.exception(
f"(PID {os.getpid()}) Init method of engine %s failed due to an exception.", self.engine.name
)
init_ok = False init_ok = False
# In older engines, None is returned from the init method, which is # In older engines, None is returned from the init method, which is
# equivalent to indicating that the initialization was successful. # equivalent to indicating that the initialization was successful.
+13 -4
View File
@@ -121,8 +121,8 @@ class SQLiteAppl(abc.ABC):
.. _WAL: https://sqlite.org/wal.html .. _WAL: https://sqlite.org/wal.html
""" """
SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = { SQLITE_CONNECT_ARGS: dict[str, str | float | int | bool | None] = {
# "timeout": 5.0, "timeout": 3.0, # default is 5sec
# "detect_types": 0, # "detect_types": 0,
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"), "check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
"cached_statements": 0, # https://github.com/python/cpython/issues/118172 "cached_statements": 0, # https://github.com/python/cpython/issues/118172
@@ -195,6 +195,7 @@ class SQLiteAppl(abc.ABC):
self.db_url: str = db_url self.db_url: str = db_url
self.properties: SQLiteProperties = SQLiteProperties(db_url) self.properties: SQLiteProperties = SQLiteProperties(db_url)
self._init_done: bool = False self._init_done: bool = False
self._DB: sqlite3.Connection | None = None
self._compatibility() self._compatibility()
# atexit.register(self.tear_down) # atexit.register(self.tear_down)
@@ -209,7 +210,7 @@ class SQLiteAppl(abc.ABC):
def _compatibility(self): def _compatibility(self):
if self.SQLITE_THREADING_MODE == "serialized": if self.SQLITE_THREADING_MODE == "serialized":
self._DB: sqlite3.Connection | None = None self._DB = None
else: else:
msg = ( msg = (
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode," f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
@@ -228,7 +229,13 @@ class SQLiteAppl(abc.ABC):
def _connect(self) -> sqlite3.Connection: def _connect(self) -> sqlite3.Connection:
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
try:
with conn:
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}") conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
except sqlite3.OperationalError:
# when database is locked, the journal_mode is already set by
# different but concurrent process (no need to set it once more)
pass
self.register_functions(conn) self.register_functions(conn)
return conn return conn
@@ -312,6 +319,7 @@ class SQLiteAppl(abc.ABC):
# Since more than one instance of SQLiteAppl share the same DB # Since more than one instance of SQLiteAppl share the same DB
# connection, we need to make sure that each SQLiteAppl instance has run # connection, we need to make sure that each SQLiteAppl instance has run
# its init method at least once. # its init method at least once.
with conn:
self.init(conn) self.init(conn)
return conn return conn
@@ -330,6 +338,7 @@ class SQLiteAppl(abc.ABC):
self._init_done = True self._init_done = True
logger.debug("init DB: %s", self.db_url) logger.debug("init DB: %s", self.db_url)
with conn:
self.properties.init(conn) self.properties.init(conn)
ver = self.properties("DB_SCHEMA") ver = self.properties("DB_SCHEMA")
@@ -409,7 +418,7 @@ CREATE TABLE IF NOT EXISTS properties (
self._init_done = True self._init_done = True
logger.debug("init properties of DB: %s", self.db_url) logger.debug("init properties of DB: %s", self.db_url)
res = conn.execute(self.SQL_TABLE_EXISTS) res = conn.execute(self.SQL_TABLE_EXISTS)
if res.fetchone() is None: # DB schema needs to be be created if res.fetchone() is None: # DB schema needs to be created
self.create_schema(conn) self.create_schema(conn)
return True return True
+14 -1
View File
@@ -1348,6 +1348,8 @@ def run():
def init(): def init():
# pylint: disable=import-outside-toplevel
if searx.sxng_debug or app.debug: if searx.sxng_debug or app.debug:
app.debug = True app.debug = True
searx.sxng_debug = True searx.sxng_debug = True
@@ -1358,6 +1360,18 @@ def init():
logger.error("server.secret_key is not changed. Please use something else instead of ultrasecretkey.") logger.error("server.secret_key is not changed. Please use something else instead of ultrasecretkey.")
sys.exit(1) sys.exit(1)
# init database schema first / DB schema is created with the first connect
from searx.data import get_cache
from searx.enginelib import ENGINES_CACHE
conn = get_cache().connect()
conn.close()
conn = ENGINES_CACHE.connect()
conn.close()
favicons.init()
# init application
locales_initialize() locales_initialize()
valkey_initialize() valkey_initialize()
searx.plugins.initialize(app) searx.plugins.initialize(app)
@@ -1366,7 +1380,6 @@ def init():
searx.search.initialize(check_network=True, enable_metrics=metrics) searx.search.initialize(check_network=True, enable_metrics=metrics)
limiter.initialize(app, settings) limiter.initialize(app, settings)
favicons.init()
def static_headers(headers: Headers, _path: str, _url: str) -> None: def static_headers(headers: Headers, _path: str, _url: str) -> None: