mirror of
https://github.com/searxng/searxng.git
synced 2026-06-11 04:17:50 +02:00
[fix] sqlitedb: create DB Schema (DDL) during app initialization (hardening) (#6187)
The initialization of the DB schema ("base schema") has so far been done on
demand, which causes race conditions with competing threads and processes.
The DDL statements for creating the "base schema" are now executed as part of
the initialization of the app.
Further improvements were made to harden the database applications:
- Wikidata & Radio-Browser engine perform their initialization only once (so far
the initialization was carried out in each thread/process).
- If multiple processes try to set DB's WAL mode when opening the DB at the same
time, this usually leads to another race condition, which is now also caught.
Related:
- https://github.com/searxng/searxng/issues/6181#issuecomment-4586705
Closes: #6181
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
+8
-5
@@ -444,12 +444,10 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
|||||||
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
||||||
"""Get value of ``key`` from table given by argument ``ctx``. If
|
"""Get value of ``key`` from table given by argument ``ctx``. If
|
||||||
``ctx`` argument is ``None`` (the default), a table name is generated
|
``ctx`` argument is ``None`` (the default), a table name is generated
|
||||||
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
|
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists in
|
||||||
table), the ``default`` value is returned.
|
the table or the table not exists, the ``default`` value is returned.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
table = ctx
|
table = ctx
|
||||||
self.maintenance()
|
|
||||||
|
|
||||||
if not table:
|
if not table:
|
||||||
table = self.normalize_name(self.cfg.name)
|
table = self.normalize_name(self.cfg.name)
|
||||||
@@ -457,6 +455,9 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
|||||||
if table not in self.table_names:
|
if table not in self.table_names:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
# Before values are taken from the table, a maintenance interval may
|
||||||
|
# need to be carried out.
|
||||||
|
self.maintenance()
|
||||||
sql = f"SELECT value FROM {table} WHERE key = ?"
|
sql = f"SELECT value FROM {table} WHERE key = ?"
|
||||||
row = self.DB.execute(sql, (key,)).fetchone()
|
row = self.DB.execute(sql, (key,)).fetchone()
|
||||||
if row is None:
|
if row is None:
|
||||||
@@ -469,12 +470,14 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
|||||||
If ``ctx`` argument is ``None`` (the default), a table name is
|
If ``ctx`` argument is ``None`` (the default), a table name is
|
||||||
generated from the :py:obj:`ExpireCacheCfg.name`."""
|
generated from the :py:obj:`ExpireCacheCfg.name`."""
|
||||||
table = ctx
|
table = ctx
|
||||||
self.maintenance()
|
|
||||||
|
|
||||||
if not table:
|
if not table:
|
||||||
table = self.normalize_name(self.cfg.name)
|
table = self.normalize_name(self.cfg.name)
|
||||||
|
|
||||||
if table in self.table_names:
|
if table in self.table_names:
|
||||||
|
# Before values are taken from the table, a maintenance interval may
|
||||||
|
# need to be carried out.
|
||||||
|
self.maintenance()
|
||||||
for row in self.DB.execute(f"SELECT key, value FROM {table}"):
|
for row in self.DB.execute(f"SELECT key, value FROM {table}"):
|
||||||
yield row[0], self.deserialize(row[1])
|
yield row[0], self.deserialize(row[1])
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import typing as t
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import copy
|
import copy
|
||||||
|
import os
|
||||||
from os.path import realpath, dirname
|
from os.path import realpath, dirname
|
||||||
|
|
||||||
import types
|
import types
|
||||||
@@ -278,6 +279,8 @@ def load_engines(engine_list: list[dict[str, t.Any]]):
|
|||||||
else:
|
else:
|
||||||
# if an engine can't be loaded (if for example the engine is missing
|
# if an engine can't be loaded (if for example the engine is missing
|
||||||
# tor or some other requirements) its set to inactive!
|
# tor or some other requirements) its set to inactive!
|
||||||
logger.error("loading engine %s failed: set engine to inactive!", engine_data.get("name", "???"))
|
logger.error(
|
||||||
|
f"(PID {os.getpid()}) loading engine %s failed: set engine to inactive!", engine_data.get("name", "???")
|
||||||
|
)
|
||||||
engine_data["inactive"] = True
|
engine_data["inactive"] = True
|
||||||
return engines
|
return engines
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import random
|
import random
|
||||||
import socket
|
import socket
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
@@ -59,7 +60,19 @@ seconds."""
|
|||||||
def init(_):
|
def init(_):
|
||||||
global CACHE # pylint: disable=global-statement
|
global CACHE # pylint: disable=global-statement
|
||||||
CACHE = EngineCache("radio_browser")
|
CACHE = EngineCache("radio_browser")
|
||||||
|
|
||||||
|
# In an environment with competing processes, the initial loading of the
|
||||||
|
# cache is required only once.
|
||||||
|
eng_state: str | None = CACHE.get("eng_state")
|
||||||
|
if not eng_state or not eng_state.startswith("STATE:"):
|
||||||
|
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
|
||||||
|
try:
|
||||||
server_list()
|
server_list()
|
||||||
|
except Exception:
|
||||||
|
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
logger.debug(eng_state)
|
||||||
|
|
||||||
|
|
||||||
def server_list() -> list[str]:
|
def server_list() -> list[str]:
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ Some implementations are shared from :ref:`wikipedia engine`.
|
|||||||
|
|
||||||
import typing as t
|
import typing as t
|
||||||
|
|
||||||
|
import os
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from urllib.parse import urlencode, unquote
|
from urllib.parse import urlencode, unquote
|
||||||
from json import loads
|
from json import loads
|
||||||
@@ -827,7 +828,19 @@ def debug_explain_wikidata_query(query: str, method: str = "GET"):
|
|||||||
def init(_):
|
def init(_):
|
||||||
global CACHE # pylint: disable=global-statement
|
global CACHE # pylint: disable=global-statement
|
||||||
CACHE = EngineCache("wikidata")
|
CACHE = EngineCache("wikidata")
|
||||||
|
|
||||||
|
# In an environment with competing processes, the initial loading of the
|
||||||
|
# cache is required only once.
|
||||||
|
eng_state: str | None = CACHE.get("eng_state")
|
||||||
|
if not eng_state or not eng_state.startswith("STATE:"):
|
||||||
|
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
|
||||||
|
try:
|
||||||
init_wikidata_properties()
|
init_wikidata_properties()
|
||||||
|
except Exception:
|
||||||
|
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
logger.debug(eng_state)
|
||||||
|
|
||||||
|
|
||||||
def init_wikidata_properties():
|
def init_wikidata_properties():
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import typing as t
|
import typing as t
|
||||||
|
|
||||||
|
import os
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from abc import abstractmethod, ABC
|
from abc import abstractmethod, ABC
|
||||||
@@ -154,7 +155,9 @@ class EngineProcessor(ABC):
|
|||||||
try:
|
try:
|
||||||
init_ok = self.engine.init(eng_setting)
|
init_ok = self.engine.init(eng_setting)
|
||||||
except Exception: # pylint: disable=broad-except
|
except Exception: # pylint: disable=broad-except
|
||||||
logger.exception("Init method of engine %s failed due to an exception.", self.engine.name)
|
logger.exception(
|
||||||
|
f"(PID {os.getpid()}) Init method of engine %s failed due to an exception.", self.engine.name
|
||||||
|
)
|
||||||
init_ok = False
|
init_ok = False
|
||||||
# In older engines, None is returned from the init method, which is
|
# In older engines, None is returned from the init method, which is
|
||||||
# equivalent to indicating that the initialization was successful.
|
# equivalent to indicating that the initialization was successful.
|
||||||
|
|||||||
+13
-4
@@ -121,8 +121,8 @@ class SQLiteAppl(abc.ABC):
|
|||||||
|
|
||||||
.. _WAL: https://sqlite.org/wal.html
|
.. _WAL: https://sqlite.org/wal.html
|
||||||
"""
|
"""
|
||||||
SQLITE_CONNECT_ARGS: dict[str,str|int|bool|None] = {
|
SQLITE_CONNECT_ARGS: dict[str, str | float | int | bool | None] = {
|
||||||
# "timeout": 5.0,
|
"timeout": 3.0, # default is 5sec
|
||||||
# "detect_types": 0,
|
# "detect_types": 0,
|
||||||
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
|
"check_same_thread": bool(SQLITE_THREADING_MODE != "serialized"),
|
||||||
"cached_statements": 0, # https://github.com/python/cpython/issues/118172
|
"cached_statements": 0, # https://github.com/python/cpython/issues/118172
|
||||||
@@ -195,6 +195,7 @@ class SQLiteAppl(abc.ABC):
|
|||||||
self.db_url: str = db_url
|
self.db_url: str = db_url
|
||||||
self.properties: SQLiteProperties = SQLiteProperties(db_url)
|
self.properties: SQLiteProperties = SQLiteProperties(db_url)
|
||||||
self._init_done: bool = False
|
self._init_done: bool = False
|
||||||
|
self._DB: sqlite3.Connection | None = None
|
||||||
self._compatibility()
|
self._compatibility()
|
||||||
# atexit.register(self.tear_down)
|
# atexit.register(self.tear_down)
|
||||||
|
|
||||||
@@ -209,7 +210,7 @@ class SQLiteAppl(abc.ABC):
|
|||||||
def _compatibility(self):
|
def _compatibility(self):
|
||||||
|
|
||||||
if self.SQLITE_THREADING_MODE == "serialized":
|
if self.SQLITE_THREADING_MODE == "serialized":
|
||||||
self._DB: sqlite3.Connection | None = None
|
self._DB = None
|
||||||
else:
|
else:
|
||||||
msg = (
|
msg = (
|
||||||
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
|
f"SQLite library is compiled with {self.SQLITE_THREADING_MODE} mode,"
|
||||||
@@ -228,7 +229,13 @@ class SQLiteAppl(abc.ABC):
|
|||||||
|
|
||||||
def _connect(self) -> sqlite3.Connection:
|
def _connect(self) -> sqlite3.Connection:
|
||||||
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
|
conn = sqlite3.Connection(self.db_url, **self.SQLITE_CONNECT_ARGS) # type: ignore
|
||||||
|
try:
|
||||||
|
with conn:
|
||||||
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
|
conn.execute(f"PRAGMA journal_mode={self.SQLITE_JOURNAL_MODE}")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
# when database is locked, the journal_mode is already set by
|
||||||
|
# different but concurrent process (no need to set it once more)
|
||||||
|
pass
|
||||||
self.register_functions(conn)
|
self.register_functions(conn)
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
@@ -312,6 +319,7 @@ class SQLiteAppl(abc.ABC):
|
|||||||
# Since more than one instance of SQLiteAppl share the same DB
|
# Since more than one instance of SQLiteAppl share the same DB
|
||||||
# connection, we need to make sure that each SQLiteAppl instance has run
|
# connection, we need to make sure that each SQLiteAppl instance has run
|
||||||
# its init method at least once.
|
# its init method at least once.
|
||||||
|
with conn:
|
||||||
self.init(conn)
|
self.init(conn)
|
||||||
|
|
||||||
return conn
|
return conn
|
||||||
@@ -330,6 +338,7 @@ class SQLiteAppl(abc.ABC):
|
|||||||
self._init_done = True
|
self._init_done = True
|
||||||
|
|
||||||
logger.debug("init DB: %s", self.db_url)
|
logger.debug("init DB: %s", self.db_url)
|
||||||
|
with conn:
|
||||||
self.properties.init(conn)
|
self.properties.init(conn)
|
||||||
|
|
||||||
ver = self.properties("DB_SCHEMA")
|
ver = self.properties("DB_SCHEMA")
|
||||||
@@ -409,7 +418,7 @@ CREATE TABLE IF NOT EXISTS properties (
|
|||||||
self._init_done = True
|
self._init_done = True
|
||||||
logger.debug("init properties of DB: %s", self.db_url)
|
logger.debug("init properties of DB: %s", self.db_url)
|
||||||
res = conn.execute(self.SQL_TABLE_EXISTS)
|
res = conn.execute(self.SQL_TABLE_EXISTS)
|
||||||
if res.fetchone() is None: # DB schema needs to be be created
|
if res.fetchone() is None: # DB schema needs to be created
|
||||||
self.create_schema(conn)
|
self.create_schema(conn)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|||||||
+14
-1
@@ -1348,6 +1348,8 @@ def run():
|
|||||||
|
|
||||||
def init():
|
def init():
|
||||||
|
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
|
||||||
if searx.sxng_debug or app.debug:
|
if searx.sxng_debug or app.debug:
|
||||||
app.debug = True
|
app.debug = True
|
||||||
searx.sxng_debug = True
|
searx.sxng_debug = True
|
||||||
@@ -1358,6 +1360,18 @@ def init():
|
|||||||
logger.error("server.secret_key is not changed. Please use something else instead of ultrasecretkey.")
|
logger.error("server.secret_key is not changed. Please use something else instead of ultrasecretkey.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# init database schema first / DB schema is created with the first connect
|
||||||
|
from searx.data import get_cache
|
||||||
|
from searx.enginelib import ENGINES_CACHE
|
||||||
|
|
||||||
|
conn = get_cache().connect()
|
||||||
|
conn.close()
|
||||||
|
conn = ENGINES_CACHE.connect()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
favicons.init()
|
||||||
|
|
||||||
|
# init application
|
||||||
locales_initialize()
|
locales_initialize()
|
||||||
valkey_initialize()
|
valkey_initialize()
|
||||||
searx.plugins.initialize(app)
|
searx.plugins.initialize(app)
|
||||||
@@ -1366,7 +1380,6 @@ def init():
|
|||||||
searx.search.initialize(check_network=True, enable_metrics=metrics)
|
searx.search.initialize(check_network=True, enable_metrics=metrics)
|
||||||
|
|
||||||
limiter.initialize(app, settings)
|
limiter.initialize(app, settings)
|
||||||
favicons.init()
|
|
||||||
|
|
||||||
|
|
||||||
def static_headers(headers: Headers, _path: str, _url: str) -> None:
|
def static_headers(headers: Headers, _path: str, _url: str) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user