[mod] ExpireCacheSQLite - implement .setmany() for bulk loading

[1] https://github.com/searxng/searxng/issues/5223#issuecomment-3328597147

Suggested-by: Ivan G <igabaldon@inetol.net> [1]
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser
2025-09-24 19:34:03 +02:00
committed by Markus Heiser
parent 4f4de3fc87
commit 18a58943cc
4 changed files with 131 additions and 40 deletions
+20
View File
@@ -0,0 +1,20 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Command line implementation"""
import typer
from .core import get_cache
app = typer.Typer()
@app.command()
def state():
"""show state of the cache"""
cache = get_cache()
for table in cache.table_names:
for row in cache.DB.execute(f"SELECT count(*) FROM {table}"):
print(f"cache table {table} holds {row[0]} key/value pairs")
app()
+8 -11
View File
@@ -6,10 +6,12 @@ __all__ = ["CurrenciesDB"]
import typing as t
import json
import pathlib
import time
from .core import get_cache, log
if t.TYPE_CHECKING:
from searx.cache import CacheRowType
@t.final
class CurrenciesDB:
@@ -33,19 +35,14 @@ class CurrenciesDB:
# in /tmp and will be rebuild during the reboot anyway
def load(self):
_start = time.time()
log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f:
data_dict: dict[str, dict[str, str]] = json.load(f)
for key, value in data_dict["names"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
for key, value in data_dict["iso4217"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
log.debug(
"init searx.data.CURRENCIES added %s items in %s sec.",
len(data_dict["names"]) + len(data_dict["iso4217"]),
time.time() - _start,
)
rows: "list[CacheRowType]" = [(k, v, None) for k, v in data_dict["names"].items()]
self.cache.setmany(rows, ctx=self.ctx_names)
rows = [(k, v, None) for k, v in data_dict["iso4217"].items()]
self.cache.setmany(rows, ctx=self.ctx_iso4217)
def name_to_iso4217(self, name: str) -> str | None:
self.init()
+24 -13
View File
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store TrackerPatterns data in a SQL database."""
import typing
import typing as t
__all__ = ["TrackerPatternsDB"]
@@ -14,9 +14,14 @@ from httpx import HTTPError
from searx.data.core import get_cache, log
from searx.network import get as http_get
if t.TYPE_CHECKING:
from searx.cache import CacheRowType
RuleType = tuple[str, list[str], list[str]]
@t.final
class TrackerPatternsDB:
# pylint: disable=missing-class-docstring
@@ -31,9 +36,9 @@ class TrackerPatternsDB:
class Fields:
# pylint: disable=too-few-public-methods, invalid-name
url_regexp: typing.Final = 0 # URL (regular expression) match condition of the link
url_ignore: typing.Final = 1 # URL (regular expression) to ignore
del_args: typing.Final = 2 # list of URL arguments (regular expression) to delete
url_regexp: t.Final = 0 # URL (regular expression) match condition of the link
url_ignore: t.Final = 1 # URL (regular expression) to ignore
del_args: t.Final = 2 # list of URL arguments (regular expression) to delete
def __init__(self):
self.cache = get_cache()
@@ -49,19 +54,25 @@ class TrackerPatternsDB:
def load(self):
log.debug("init searx.data.TRACKER_PATTERNS")
for rule in self.iter_clear_list():
self.add(rule)
rows: "list[CacheRowType]" = []
def add(self, rule: RuleType):
self.cache.set(
key=rule[self.Fields.url_regexp],
value=(
for rule in self.iter_clear_list():
key = rule[self.Fields.url_regexp]
value = (
rule[self.Fields.url_ignore],
rule[self.Fields.del_args],
),
ctx=self.ctx_name,
expire=None,
)
rows.append((key, value, None))
self.cache.setmany(rows, ctx=self.ctx_name)
def add(self, rule: RuleType):
key = rule[self.Fields.url_regexp]
value = (
rule[self.Fields.url_ignore],
rule[self.Fields.del_args],
)
self.cache.set(key=key, value=value, ctx=self.ctx_name, expire=None)
def rules(self) -> Iterator[RuleType]:
self.init()