mirror of
https://github.com/searxng/searxng.git
synced 2026-06-11 12:27:52 +02:00
[fix] sqlitedb: create DB Schema (DDL) during app initialization (hardening) (#6187)
The initialization of the DB schema ("base schema") has so far been done on
demand, which causes race conditions with competing threads and processes.
The DDL statements for creating the "base schema" are now executed as part of
the initialization of the app.
Further improvements were made to harden the database applications:
- Wikidata & Radio-Browser engine perform their initialization only once (so far
the initialization was carried out in each thread/process).
- If multiple processes try to set DB's WAL mode when opening the DB at the same
time, this usually leads to another race condition, which is now also caught.
Related:
- https://github.com/searxng/searxng/issues/6181#issuecomment-4586705
Closes: #6181
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
+8
-5
@@ -444,12 +444,10 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
||||
"""Get value of ``key`` from table given by argument ``ctx``. If
|
||||
``ctx`` argument is ``None`` (the default), a table name is generated
|
||||
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
|
||||
table), the ``default`` value is returned.
|
||||
|
||||
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists in
|
||||
the table or the table not exists, the ``default`` value is returned.
|
||||
"""
|
||||
table = ctx
|
||||
self.maintenance()
|
||||
|
||||
if not table:
|
||||
table = self.normalize_name(self.cfg.name)
|
||||
@@ -457,6 +455,9 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||
if table not in self.table_names:
|
||||
return default
|
||||
|
||||
# Before values are taken from the table, a maintenance interval may
|
||||
# need to be carried out.
|
||||
self.maintenance()
|
||||
sql = f"SELECT value FROM {table} WHERE key = ?"
|
||||
row = self.DB.execute(sql, (key,)).fetchone()
|
||||
if row is None:
|
||||
@@ -469,12 +470,14 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
||||
If ``ctx`` argument is ``None`` (the default), a table name is
|
||||
generated from the :py:obj:`ExpireCacheCfg.name`."""
|
||||
table = ctx
|
||||
self.maintenance()
|
||||
|
||||
if not table:
|
||||
table = self.normalize_name(self.cfg.name)
|
||||
|
||||
if table in self.table_names:
|
||||
# Before values are taken from the table, a maintenance interval may
|
||||
# need to be carried out.
|
||||
self.maintenance()
|
||||
for row in self.DB.execute(f"SELECT key, value FROM {table}"):
|
||||
yield row[0], self.deserialize(row[1])
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import typing as t
|
||||
|
||||
import sys
|
||||
import copy
|
||||
import os
|
||||
from os.path import realpath, dirname
|
||||
|
||||
import types
|
||||
@@ -278,6 +279,8 @@ def load_engines(engine_list: list[dict[str, t.Any]]):
|
||||
else:
|
||||
# if an engine can't be loaded (if for example the engine is missing
|
||||
# tor or some other requirements) its set to inactive!
|
||||
logger.error("loading engine %s failed: set engine to inactive!", engine_data.get("name", "???"))
|
||||
logger.error(
|
||||
f"(PID {os.getpid()}) loading engine %s failed: set engine to inactive!", engine_data.get("name", "???")
|
||||
)
|
||||
engine_data["inactive"] = True
|
||||
return engines
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
from urllib.parse import urlencode
|
||||
@@ -59,7 +60,19 @@ seconds."""
|
||||
def init(_):
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache("radio_browser")
|
||||
server_list()
|
||||
|
||||
# In an environment with competing processes, the initial loading of the
|
||||
# cache is required only once.
|
||||
eng_state: str | None = CACHE.get("eng_state")
|
||||
if not eng_state or not eng_state.startswith("STATE:"):
|
||||
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
|
||||
try:
|
||||
server_list()
|
||||
except Exception:
|
||||
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
|
||||
raise
|
||||
else:
|
||||
logger.debug(eng_state)
|
||||
|
||||
|
||||
def server_list() -> list[str]:
|
||||
|
||||
@@ -7,6 +7,7 @@ Some implementations are shared from :ref:`wikipedia engine`.
|
||||
|
||||
import typing as t
|
||||
|
||||
import os
|
||||
from hashlib import md5
|
||||
from urllib.parse import urlencode, unquote
|
||||
from json import loads
|
||||
@@ -827,7 +828,19 @@ def debug_explain_wikidata_query(query: str, method: str = "GET"):
|
||||
def init(_):
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache("wikidata")
|
||||
init_wikidata_properties()
|
||||
|
||||
# In an environment with competing processes, the initial loading of the
|
||||
# cache is required only once.
|
||||
eng_state: str | None = CACHE.get("eng_state")
|
||||
if not eng_state or not eng_state.startswith("STATE:"):
|
||||
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
|
||||
try:
|
||||
init_wikidata_properties()
|
||||
except Exception:
|
||||
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
|
||||
raise
|
||||
else:
|
||||
logger.debug(eng_state)
|
||||
|
||||
|
||||
def init_wikidata_properties():
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
import typing as t
|
||||
|
||||
import os
|
||||
import logging
|
||||
import threading
|
||||
from abc import abstractmethod, ABC
|
||||
@@ -154,7 +155,9 @@ class EngineProcessor(ABC):
|
||||
try:
|
||||
init_ok = self.engine.init(eng_setting)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
logger.exception("Init method of engine %s failed due to an exception.", self.engine.name)
|
||||
logger.exception(
|
||||
f"(PID {os.getpid()}) Init method of engine %s failed due to an exception.", self.engine.name
|
||||
)
|
||||
init_ok = False
|
||||
# In older engines, None is returned from the init method, which is
|
||||
# equivalent to indicating that the initialization was successful.
|
||||
|
||||
+16
-7
@@ -121,8 +121,8 @@ class SQLiteAppl(abc.ABC):
|
||||
|
||||
.. _WAL: https://sqlite.org/wal.html
|
||||
"""
|
||||
SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = {
|
||||
# "timeout": 5.0,
|
||||
SQLITE_CONNECT_ARGS: dict[str, str | float | int | bool | None] = {
|
||||
"timeout": 3.0, # default is 5sec
|
||||
# "detect_types": 0,
|
||||
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
|
||||
"cached_statements": 0, # https://github.com/python/cpython/issues/118172
|
||||
@@ -195,6 +195,7 @@ class SQLiteAppl(abc.ABC):
|
||||
self.db_url: str = db_url
|
||||
self.properties: SQLiteProperties = SQLiteProperties(db_url)
|
||||
self._init_done: bool = False
|
||||
self._DB: sqlite3.Connection | None = None
|
||||
self._compatibility()
|
||||
# atexit.register(self.tear_down)
|
||||
|
||||
@@ -209,7 +210,7 @@ class SQLiteAppl(abc.ABC):
|
||||
def _compatibility(self):
|
||||
|
||||
if self.SQLITE_THREADING_MODE == "serialized":
|
||||
self._DB: sqlite3.Connection | None = None
|
||||
self._DB = None
|
||||
else:
|
||||
msg = (
|
||||
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
|
||||
@@ -228,7 +229,13 @@ class SQLiteAppl(abc.ABC):
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
|
||||
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
|
||||
try:
|
||||
with conn:
|
||||
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
|
||||
except sqlite3.OperationalError:
|
||||
# when database is locked, the journal_mode is already set by
|
||||
# different but concurrent process (no need to set it once more)
|
||||
pass
|
||||
self.register_functions(conn)
|
||||
return conn
|
||||
|
||||
@@ -312,7 +319,8 @@ class SQLiteAppl(abc.ABC):
|
||||
# Since more than one instance of SQLiteAppl share the same DB
|
||||
# connection, we need to make sure that each SQLiteAppl instance has run
|
||||
# its init method at least once.
|
||||
self.init(conn)
|
||||
with conn:
|
||||
self.init(conn)
|
||||
|
||||
return conn
|
||||
|
||||
@@ -330,7 +338,8 @@ class SQLiteAppl(abc.ABC):
|
||||
self._init_done = True
|
||||
|
||||
logger.debug("init DB: %s", self.db_url)
|
||||
self.properties.init(conn)
|
||||
with conn:
|
||||
self.properties.init(conn)
|
||||
|
||||
ver = self.properties("DB_SCHEMA")
|
||||
if ver is None:
|
||||
@@ -409,7 +418,7 @@ CREATE TABLE IF NOT EXISTS properties (
|
||||
self._init_done = True
|
||||
logger.debug("init properties of DB: %s", self.db_url)
|
||||
res = conn.execute(self.SQL_TABLE_EXISTS)
|
||||
if res.fetchone() is None: # DB schema needs to be be created
|
||||
if res.fetchone() is None: # DB schema needs to be created
|
||||
self.create_schema(conn)
|
||||
return True
|
||||
|
||||
|
||||
+14
-1
@@ -1348,6 +1348,8 @@ def run():
|
||||
|
||||
def init():
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
if searx.sxng_debug or app.debug:
|
||||
app.debug = True
|
||||
searx.sxng_debug = True
|
||||
@@ -1358,6 +1360,18 @@ def init():
|
||||
logger.error("server.secret_key is not changed. Please use something else instead of ultrasecretkey.")
|
||||
sys.exit(1)
|
||||
|
||||
# init database schema first / DB schema is created with the first connect
|
||||
from searx.data import get_cache
|
||||
from searx.enginelib import ENGINES_CACHE
|
||||
|
||||
conn = get_cache().connect()
|
||||
conn.close()
|
||||
conn = ENGINES_CACHE.connect()
|
||||
conn.close()
|
||||
|
||||
favicons.init()
|
||||
|
||||
# init application
|
||||
locales_initialize()
|
||||
valkey_initialize()
|
||||
searx.plugins.initialize(app)
|
||||
@@ -1366,7 +1380,6 @@ def init():
|
||||
searx.search.initialize(check_network=True, enable_metrics=metrics)
|
||||
|
||||
limiter.initialize(app, settings)
|
||||
favicons.init()
|
||||
|
||||
|
||||
def static_headers(headers: Headers, _path: str, _url: str) -> None:
|
||||
|
||||
Reference in New Issue
Block a user