1 Commits

Author SHA1 Message Date
dependabot[bot] ed723034c4 [upd] pypi: Update docutils requirement from >=0.21.2 to >=0.23
Updates the requirements on [docutils](https://github.com/rtfd/recommonmark) to permit the latest version.
- [Changelog](https://github.com/readthedocs/recommonmark/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rtfd/recommonmark/commits)

---
updated-dependencies:
- dependency-name: docutils
  dependency-version: '0.23'
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-05-29 07:34:18 +00:00
282 changed files with 3729 additions and 7867 deletions
+163
View File
@@ -0,0 +1,163 @@
;;; .dir-locals.el
;;
;; Per-Directory Local Variables:
;; https://www.gnu.org/software/emacs/manual/html_node/emacs/Directory-Variables.html
;;
;; For full fledge developer tools install emacs packages:
;;
;; M-x package-install ...
;;
;; magit gitconfig
;; nvm lsp-mode lsp-pyright lsp-eslint
;; pyvenv pylint pip-requirements
;; jinja2-mode
;; json-mode
;; company company-jedi company-quickhelp company-shell
;; realgud
;; sphinx-doc markdown-mode graphviz-dot-mode
;; apache-mode nginx-mode
;;
;; To setup a developer environment, build target::
;;
;; $ make node.env.dev pyenv.install
;;
;; Some buffer locals are referencing the project environment:
;;
;; - prj-root --> <repo>/
;; - nvm-dir --> <repo>/.nvm
;; - python-environment-directory --> <repo>/local
;; - python-environment-default-root-name --> py3
;; - python-shell-virtualenv-root --> <repo>/local/py3
;; When this variable is set with the path of the virtualenv to use,
;; `process-environment' and `exec-path' get proper values in order to run
;; shells inside the specified virtualenv, example::
;; (setq python-shell-virtualenv-root "/path/to/env/")
;; - python-shell-interpreter --> <repo>/local/py3/bin/python
;;
;; Python development:
;;
;; Jedi, flycheck & other python stuff should use the 'python-shell-interpreter'
;; from the local py3 environment.
;;
((nil
. ((fill-column . 80)
(indent-tabs-mode . nil)
(eval . (progn
(add-to-list 'auto-mode-alist '("\\.html\\'" . jinja2-mode))
;; project root folder is where the `.dir-locals.el' is located
(setq-local prj-root
(locate-dominating-file default-directory ".dir-locals.el"))
(setq-local python-environment-directory
(expand-file-name "./local" prj-root))
;; to get in use of NVM environment, install https://github.com/rejeep/nvm.el
(setq-local nvm-dir (expand-file-name "./.nvm" prj-root))
;; use nodejs from the (local) NVM environment (see nvm-dir)
(nvm-use-for-buffer)
(ignore-errors (require 'lsp))
(setq-local lsp-server-install-dir (car (cdr nvm-current-version)))
(setq-local lsp-enable-file-watchers nil)
;; use 'py3' environment as default
(setq-local python-environment-default-root-name
"py3")
(setq-local python-shell-virtualenv-root
(expand-file-name
python-environment-default-root-name python-environment-directory))
(setq-local python-shell-interpreter
(expand-file-name
"bin/python" python-shell-virtualenv-root))))))
(makefile-gmake-mode
. ((indent-tabs-mode . t)))
(yaml-mode
. ((eval . (progn
;; flycheck should use the local py3 environment
(setq-local flycheck-yaml-yamllint-executable
(expand-file-name "bin/yamllint" python-shell-virtualenv-root))
(setq-local flycheck-yamllintrc
(expand-file-name ".yamllint.yml" prj-root))
(flycheck-checker . yaml-yamllint)))))
(json-mode
. ((eval . (progn
(setq-local js-indent-level 4)
(flycheck-checker . json-python-json)))))
(js-mode
. ((eval . (progn
(ignore-errors (require 'lsp-eslint))
(setq-local js-indent-level 2)
;; flycheck should use the eslint checker from developer tools
(setq-local flycheck-javascript-eslint-executable
(expand-file-name "node_modules/.bin/eslint" prj-root))
;; (flycheck-mode)
(if (featurep 'lsp-eslint)
(lsp))
))))
(python-mode
. ((eval . (progn
(ignore-errors (require 'jedi-core))
(ignore-errors (require 'lsp-pyright))
(ignore-errors (sphinx-doc-mode))
(setq-local python-environment-virtualenv
(list (expand-file-name "bin/virtualenv" python-shell-virtualenv-root)
;;"--system-site-packages"
"--quiet"))
(setq-local pylint-command
(expand-file-name "bin/pylint" python-shell-virtualenv-root))
(if (featurep 'lsp-pyright)
(lsp))
;; pylint will find the '.pylintrc' file next to the CWD
;; https://pylint.readthedocs.io/en/latest/user_guide/run.html#command-line-options
(setq-local flycheck-pylintrc
".pylintrc")
;; flycheck & other python stuff should use the local py3 environment
(setq-local flycheck-python-pylint-executable
python-shell-interpreter)
;; use 'M-x jedi:show-setup-info' and 'M-x epc:controller' to inspect jedi server
;; https://tkf.github.io/emacs-jedi/latest/#jedi:environment-root -- You
;; can specify a full path instead of a name (relative path). In that case,
;; python-environment-directory is ignored and Python virtual environment
;; is created at the specified path.
(setq-local jedi:environment-root
python-shell-virtualenv-root)
;; https://tkf.github.io/emacs-jedi/latest/#jedi:server-command
(setq-local jedi:server-command
(list python-shell-interpreter
jedi:server-script))
;; jedi:environment-virtualenv --> see above 'python-environment-virtualenv'
;; is set buffer local! No need to setup jedi:environment-virtualenv:
;;
;; Virtualenv command to use. A list of string. If it is nil,
;; python-environment-virtualenv is used instead. You must set non-nil
;; value to jedi:environment-root in order to make this setting work.
;;
;; https://tkf.github.io/emacs-jedi/latest/#jedi:environment-virtualenv
;;
;; (setq-local jedi:environment-virtualenv
;; (list (expand-file-name "bin/virtualenv" python-shell-virtualenv-root)
;; "--python"
;; "/usr/bin/python3.4"
;; ))
))))
)
-1
View File
@@ -1,6 +1,5 @@
*
!container/*.template.*
!container/entrypoint.sh
!searx/**
!requirements*.txt
+9 -9
View File
@@ -78,7 +78,7 @@ jobs:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
fetch-depth: "0"
@@ -106,10 +106,10 @@ jobs:
- if: ${{ matrix.emulation }}
name: Setup QEMU
uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
- name: Login to GHCR
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: "ghcr.io"
username: "${{ github.repository_owner }}"
@@ -141,16 +141,16 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
- if: ${{ matrix.emulation }}
name: Setup QEMU
uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0
- name: Login to GHCR
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: "ghcr.io"
username: "${{ github.repository_owner }}"
@@ -175,19 +175,19 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
- name: Login to GHCR
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: "ghcr.io"
username: "${{ github.repository_owner }}"
password: "${{ secrets.GITHUB_TOKEN }}"
- name: Login to Docker Hub
uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4.1.0
with:
registry: "docker.io"
username: "${{ secrets.DOCKER_USER }}"
+1 -1
View File
@@ -46,7 +46,7 @@ jobs:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
+2 -5
View File
@@ -37,7 +37,7 @@ jobs:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
fetch-depth: "0"
@@ -50,14 +50,11 @@ jobs:
python-${{ env.PYTHON_VERSION }}-${{ runner.arch }}-
path: "./local/"
- name: Setup dependencies
run: sudo ./utils/searxng.sh install buildhost
- name: Setup venv
run: make V=1 install
- name: Build documentation
run: make V=1 docs.html
run: make V=1 docs.clean docs.html
- if: github.ref_name == 'master'
name: Release
+2 -2
View File
@@ -39,7 +39,7 @@ jobs:
python-version: "${{ matrix.python-version }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
@@ -67,7 +67,7 @@ jobs:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
+2 -2
View File
@@ -40,7 +40,7 @@ jobs:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: "${{ secrets.WEBLATE_GITHUB_TOKEN }}"
fetch-depth: "0"
@@ -88,7 +88,7 @@ jobs:
python-version: "${{ env.PYTHON_VERSION }}"
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: "${{ secrets.WEBLATE_GITHUB_TOKEN }}"
fetch-depth: "0"
+2 -2
View File
@@ -24,7 +24,7 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: "false"
@@ -41,6 +41,6 @@ jobs:
write-comment: "false"
- name: Upload SARIFs
uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
uses: github/codeql-action/upload-sarif@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4.35.5
with:
sarif_file: "./scout.sarif"
-11
View File
@@ -1,11 +0,0 @@
[[language]]
name = "python"
language-servers = ["basedpyright", "pylsp"]
auto-format = true
[language-server.pylsp.config.pylsp]
plugins.pylint.enabled = true
plugins.isort.enabled = true
plugins.black.enabled = true
plugins.black.skip_string_normalization = true
plugins.black.line_length = 120
+29 -15
View File
@@ -2,12 +2,12 @@
"$schema": "./node_modules/@biomejs/biome/configuration_schema.json",
"files": {
"ignoreUnknown": true,
"includes": ["**", "!node_modules", "!src/brand", "!src/svg"]
"includes": ["**", "!node_modules"]
},
"assist": {
"enabled": true,
"actions": {
"preset": "recommended",
"recommended": true,
"source": {
"useSortedAttributes": "on",
"useSortedProperties": "on"
@@ -27,14 +27,12 @@
"linter": {
"enabled": true,
"rules": {
"preset": "recommended",
"recommended": true,
"complexity": {
"noForEach": "error",
"noImplicitCoercions": "error",
"noRedundantDefaultExport": "error",
"noUselessCatchBinding": "error",
"noUselessUndefined": "error",
"useArrayFind": "error",
"useSimplifiedLogicExpression": "error"
},
"correctness": {
@@ -44,11 +42,25 @@
"useSingleJsDocAsterisk": "error"
},
"nursery": {
"noContinue": "warn",
"noEqualsToNull": "warn",
"noFloatingPromises": "warn",
"noForIn": "warn",
"noIncrementDecrement": "warn",
"noMisusedPromises": "warn",
"noMultiAssign": "warn",
"noMultiStr": "warn",
"noNestedPromises": "warn",
"noParametersOnlyUsedInRecursion": "warn",
"noRedundantDefaultExport": "warn",
"noReturnAssign": "warn",
"noUselessReturn": "off",
"useAwaitThenable": "off",
"useConsistentEnumValueType": "warn",
"useDestructuring": "warn",
"useExhaustiveSwitchCases": "warn",
"useExplicitType": "off",
"useFind": "warn",
"useRegexpExec": "warn"
},
"performance": {
@@ -63,15 +75,23 @@
"noCommonJs": "error",
"noEnum": "error",
"noImplicitBoolean": "error",
"noIncrementDecrement": "error",
"noInferrableTypes": "error",
"noMultiAssign": "error",
"noMultilineString": "error",
"noNamespace": "error",
"noNegationElse": "error",
"noNestedTernary": "error",
"noParameterAssign": "error",
"noParameterProperties": "error",
"noRestrictedTypes": {
"level": "error",
"options": {
"types": {
"Element": {
"message": "Element is too generic",
"use": "HTMLElement"
}
}
}
},
"noSubstr": "error",
"noUnusedTemplateLiteral": "error",
"noUselessElse": "error",
@@ -87,7 +107,6 @@
}
},
"useConsistentBuiltinInstantiation": "error",
"useConsistentEnumValueType": "error",
"useConsistentMemberAccessibility": {
"level": "error",
"options": {
@@ -107,7 +126,6 @@
}
},
"useDefaultSwitchClause": "error",
"useDestructuring": "error",
"useExplicitLengthCheck": "error",
"useForOf": "error",
"useGroupedAccessorPairs": "error",
@@ -124,17 +142,13 @@
"useUnifiedTypeSignatures": "error"
},
"suspicious": {
"noAlert": "error",
"noBitwiseOperators": "error",
"noConstantBinaryExpressions": "error",
"noDeprecatedImports": "error",
"noEmptyBlockStatements": "error",
"noEqualsToNull": "error",
"noEvolvingTypes": "error",
"noForIn": "error",
"noImportCycles": "error",
"noNestedPromises": "error",
"noParametersOnlyUsedInRecursion": "error",
"noReturnAssign": "error",
"noUnassignedVariables": "error",
"noVar": "error",
"useNumberToFixedDigitsArgument": "error",
+353 -378
View File
File diff suppressed because it is too large Load Diff
+8 -8
View File
@@ -29,21 +29,21 @@
"swiped-events": "1.2.0"
},
"devDependencies": {
"@biomejs/biome": "2.5.0",
"@types/node": "^26.0.0",
"@biomejs/biome": "2.4.15",
"@types/node": "^25.8.0",
"browserslist": "^4.28.2",
"browserslist-to-esbuild": "^2.1.1",
"edge.js": "^6.5.1",
"less": "^4.6.6",
"edge.js": "^6.5.0",
"less": "^4.6.4",
"mathjs": "^15.2.0",
"sharp": "~0.35.1",
"sort-package-json": "^4.0.0",
"stylelint": "^17.13.0",
"sharp": "~0.34.5",
"sort-package-json": "^3.6.1",
"stylelint": "^17.11.1",
"stylelint-config-standard-less": "^4.1.0",
"stylelint-prettier": "^5.0.3",
"svgo": "^4.0.1",
"typescript": "~6.0.3",
"vite": "^8.0.16",
"vite": "^8.0.13",
"vite-bundle-analyzer": "^1.3.8"
}
}
+1 -1
View File
@@ -77,9 +77,9 @@ export default class Calculator extends Plugin {
protected async run(): Promise<string | undefined> {
const searchInput = getElement<HTMLInputElement>("q");
const node = Calculator.math.parse(searchInput.value);
try {
const node = Calculator.math.parse(searchInput.value);
return `${node.toString()} = ${node.evaluate()}`;
} catch {
// not a compatible math expression
+1
View File
@@ -5,6 +5,7 @@ import type { KeyBindingLayout } from "./main/keyboard.ts";
// synced with searx/webapp.py get_client_settings
type Settings = {
plugins?: string[];
advanced_search?: boolean;
autocomplete?: string;
autocomplete_min?: number;
doi_resolver?: string;
+4 -1
View File
@@ -21,6 +21,8 @@ RUN --mount=type=cache,id=uv,target=/root/.cache/uv set -eux -o pipefail; \
COPY --exclude=./searx/version_frozen.py ./searx/ ./searx/
ARG TIMESTAMP_SETTINGS="0"
RUN set -eux -o pipefail; \
python -m compileall -q -f -j 0 --invalidation-mode=unchecked-hash ./searx/; \
find ./searx/static/ -type f \
@@ -28,4 +30,5 @@ RUN set -eux -o pipefail; \
-exec gzip -9 -k {} + \
-exec brotli -9 -k {} + \
-exec gzip --test {}.gz + \
-exec brotli --test {}.br +
-exec brotli --test {}.br +; \
touch -c --date="@$TIMESTAMP_SETTINGS" ./searx/settings.yml
+30 -9
View File
@@ -77,23 +77,43 @@ volume_handler() {
setup_ownership "$target" "directory"
}
setup() {
local template_settings="/usr/local/searxng/settings.template.yml"
local target_settings="$__SEARXNG_CONFIG_PATH/settings.yml"
# Handle configuration file updates
config_handler() {
local target="$1"
local template="$2"
local new_template_target="$target.new"
if [ ! -f "$target_settings" ]; then
# Create/Update the configuration file
if [ -f "$target" ]; then
setup_ownership "$target" "file"
if [ "$template" -nt "$target" ]; then
cp -pfT "$template" "$new_template_target"
cat <<EOF
...
... INFORMATION
... Update available for "$target"
... It is recommended to update the configuration file to ensure proper functionality
...
... New version placed at "$new_template_target"
... Please review and merge changes
...
EOF
fi
else
cat <<EOF
...
... INFORMATION
... "$target_settings" does not exist, creating from template...
... "$target" does not exist, creating from template...
...
EOF
cp -pfT "$template_settings" "$target_settings"
cp -pfT "$template" "$target"
sed -i "s/ultrasecretkey/$(head -c 24 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9')/g" "$target_settings"
sed -i "s/ultrasecretkey/$(head -c 24 /dev/urandom | base64 | tr -dc 'a-zA-Z0-9')/g" "$target"
fi
check_file "$target_settings"
check_file "$target"
}
cat <<EOF
@@ -104,7 +124,8 @@ EOF
volume_handler "$__SEARXNG_CONFIG_PATH"
volume_handler "$__SEARXNG_DATA_PATH"
setup
# Check for files
config_handler "$__SEARXNG_SETTINGS_PATH" "/usr/local/searxng/searx/settings.yml"
# root only features
if [ "$(id -u)" -eq 0 ]; then
-8
View File
@@ -1,8 +0,0 @@
# Read the documentation before extending the defaults:
# https://docs.searxng.org/admin/settings/
use_default_settings: true
server:
secret_key: "ultrasecretkey"
image_proxy: true
-1
View File
@@ -19,7 +19,6 @@ Settings
settings_search
settings_server
settings_ui
settings_preferences
settings_redis
settings_valkey
settings_outgoing
@@ -1,8 +0,0 @@
.. _settings preferences:
================
``preferences:``
================
.. autoclass:: searx._settings.SettingsPref
:members:
-1
View File
@@ -43,7 +43,6 @@
- ``google``
- ``mwmbl``
- ``naver``
- ``privacywall``
- ``quark``
- ``qwant``
- ``seznam``
-1
View File
@@ -47,7 +47,6 @@
activated:
- :py:obj:`searx.botdetection.link_token` in the :ref:`limiter`
- :ref:`image_proxy`
.. _image_proxy:
+8
View File
@@ -0,0 +1,8 @@
.. _aol engine:
===
AOL
===
.. automodule:: searx.engines.aol
:members:
-9
View File
@@ -1,9 +0,0 @@
.. _kagi engines:
============
Kagi Engines
============
.. automodule:: searx.engines.kagi
:members:
+8
View File
@@ -0,0 +1,8 @@
.. _karmasearch engine:
===========
Karmasearch
===========
.. automodule:: searx.engines.karmasearch
:members:
+1 -1
View File
@@ -87,7 +87,7 @@ Parameters
``autocomplete`` : default from :ref:`settings search`
[ ``google``, ``dbpedia``, ``duckduckgo``, ``mwmbl``, ``startpage``,
``privacywall``, ``wikipedia``, ``swisscows``, ``qwant`` ]
``wikipedia``, ``swisscows``, ``qwant`` ]
Service which completes words as you type.
+2 -2
View File
@@ -58,8 +58,8 @@ Configured Engines
{% for mod in engines %}
* - `{{mod.name}} <{{mod.about and mod.about.website}}>`_
{%- if mod.language %}
({{mod.language | upper}})
{%- if mod.about and mod.about.language %}
({{mod.about.language | upper}})
{%- endif %}
- ``!{{mod.shortcut}}``
- {%- if 'searx.engines.' + mod.__name__ in documented_modules %}
+6 -6
View File
@@ -2,16 +2,16 @@ mock==5.2.0
nose2[coverage_plugin]==0.16.0
cov-core==1.15.0
black==25.9.0
pylint==4.0.6
pylint==4.0.5
splinter==0.21.0
selenium==4.45.0
selenium==4.44.0
Sphinx==8.2.3;python_version <= "3.11"
Sphinx==9.1.0; python_version > "3.11"
sphinx-issues==6.0.0
sphinx-jinja==2.0.2
sphinx-tabs==3.5.0
furo==2025.12.19
sphinxcontrib-programoutput==0.20
sphinxcontrib-programoutput==0.19
sphinx-autobuild==2025.8.25
sphinx-notfound-page==1.1.0
myst-parser==5.0.0
@@ -20,9 +20,9 @@ aiounittest==1.5.0
yamllint==1.38.0
wlc==2.0.0
coloredlogs==15.0.1
docutils>=0.21.2;python_version <= "3.11"
docutils>=0.23;python_version <= "3.11"
docutils>=0.22.4; python_version > "3.11"
parameterized==0.9.0
granian[reload]==2.7.6
basedpyright==1.39.8
granian[reload]==2.7.4
basedpyright==1.39.5
types-lxml==2026.2.16
+2 -2
View File
@@ -1,2 +1,2 @@
granian==2.7.6
granian[pname]==2.7.6
granian==2.7.4
granian[pname]==2.7.4
+2 -2
View File
@@ -1,4 +1,4 @@
certifi==2026.6.17
certifi==2026.5.20
babel==2.18.0
flask-babel==4.0.0
flask==3.1.3
@@ -13,7 +13,7 @@ sniffio==1.3.1
valkey==6.1.1
markdown-it-py==4.2.0
msgspec==0.21.1
typer==0.26.7
typer==0.25.1
isodate==0.7.2
whitenoise==6.12.0
typing-extensions==4.15.0
+1 -8
View File
@@ -10,7 +10,6 @@ from os.path import dirname, abspath
import logging
import msgspec
from ._settings import SettingsPref
# Debug
LOG_FORMAT_DEBUG: str = '%(levelname)-7s %(name)-30.30s: %(message)s'
@@ -48,12 +47,6 @@ def init_settings():
settings.clear()
settings.update(cfg)
if get_setting("server.public_instance"):
# enable image proxy for public instances #6125
settings["server"]["image_proxy"] = True
pref: SettingsPref = get_setting("preferences")
pref.lock.add("image_proxy")
sxng_debug = get_setting("general.debug")
if sxng_debug:
_logging_config_debug()
@@ -73,7 +66,7 @@ def init_settings():
if settings['server']['public_instance']:
logger.warning(
"Be aware you have activated features intended only for public instances. "
"This force the usage of the limiter, link_token and image proxy / "
"This force the usage of the limiter and link_token / "
"see https://docs.searxng.org/admin/searx.limiter.html"
)
-42
View File
@@ -1,42 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Implementation of the :py:obj:`preference <searx.preference>` settings."""
# pylint: disable = too-few-public-methods
import typing as t
import msgspec
class SettingsPref(msgspec.Struct, kw_only=True, forbid_unknown_fields=True):
"""Options for configuring the preferences
.. code:: yaml
preferences:
lock:
- favicon_resolver
- image_proxy
- method
# ...
"""
lock: set[
t.Literal[
"categories",
"language",
"locale",
"autocomplete",
"favicon_resolver",
"image_proxy",
"method",
"safesearch",
"theme",
"results_on_new_tab",
"doi_resolver",
"simple_style",
"center_alignment",
"query_in_title",
"search_on_category_select",
]
] = set()
"""Lock arbitrary settings on the preferences page."""
-18
View File
@@ -179,23 +179,6 @@ def naver(query: str, _sxng_locale: str) -> list[str]:
return results
def privacywall(query: str, sxng_locale: str) -> list[str]:
# Privacywall search autocompleter
country = None
if "-" in sxng_locale:
country = sxng_locale.split("-")[1]
args = {'q': query, 'cc': country}
url = f"https://www.privacywall.org/search/secure/suggestions.php?{urlencode(args)}"
response = get(url)
if not response.ok:
return []
data: list[list[str]] = response.json()
return data[1]
def qihu360search(query: str, _sxng_locale: str) -> list[str]:
# 360Search search autocompleter
url = f"https://sug.so.360.cn/suggest?{urlencode({'format': 'json', 'word': query})}"
@@ -378,7 +361,6 @@ backends: dict[str, t.Callable[[str, str], list[str]]] = {
'google': google_complete,
'mwmbl': mwmbl,
'naver': naver,
'privacywall': privacywall,
'quark': quark,
'qwant': qwant,
'seznam': seznam,
+5 -8
View File
@@ -444,10 +444,12 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
"""Get value of ``key`` from table given by argument ``ctx``. If
``ctx`` argument is ``None`` (the default), a table name is generated
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists in
the table or the table not exists, the ``default`` value is returned.
from the :py:obj:`ExpireCacheCfg.name`. If ``key`` not exists (in
table), the ``default`` value is returned.
"""
table = ctx
self.maintenance()
if not table:
table = self.normalize_name(self.cfg.name)
@@ -455,9 +457,6 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
if table not in self.table_names:
return default
# Before values are taken from the table, a maintenance interval may
# need to be carried out.
self.maintenance()
sql = f"SELECT value FROM {table} WHERE key = ?"
row = self.DB.execute(sql, (key,)).fetchone()
if row is None:
@@ -470,14 +469,12 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
If ``ctx`` argument is ``None`` (the default), a table name is
generated from the :py:obj:`ExpireCacheCfg.name`."""
table = ctx
self.maintenance()
if not table:
table = self.normalize_name(self.cfg.name)
if table in self.table_names:
# Before values are taken from the table, a maintenance interval may
# need to be carried out.
self.maintenance()
for row in self.DB.execute(f"SELECT key, value FROM {table}"):
yield row[0], self.deserialize(row[1])
+180 -465
View File
@@ -5740,6 +5740,186 @@
"zu-ZA": "ZA"
}
},
"karmasearch": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"karmasearch images": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"karmasearch news": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"karmasearch videos": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"da-DK": "da-DK",
"de-AT": "de-AT",
"de-CH": "de-CH",
"de-DE": "de-DE",
"en-AU": "en-AU",
"en-CA": "en-CA",
"en-GB": "en-GB",
"en-ID": "en-ID",
"en-IN": "en-IN",
"en-MY": "en-MY",
"en-NZ": "en-NZ",
"en-PH": "en-PH",
"en-US": "en-US",
"en-ZA": "en-ZA",
"es-AR": "es-AR",
"es-CL": "es-CL",
"es-ES": "es-ES",
"es-MX": "es-MX",
"es-US": "es-US",
"fi-FI": "fi-FI",
"fr-BE": "fr-BE",
"fr-CA": "fr-CA",
"fr-CH": "fr-CH",
"fr-FR": "fr-FR",
"it-IT": "it-IT",
"ja-JP": "ja-JP",
"ko-KR": "ko-KR",
"nl-BE": "nl-BE",
"nl-NL": "nl-NL",
"pl-PL": "pl-PL",
"pt-BR": "pt-BR",
"ru-RU": "ru-RU",
"sv-SE": "sv-SE",
"tr-TR": "tr-TR",
"zh-CN": "zh-CN",
"zh-HK": "zh-HK",
"zh-TW": "zh-TW"
}
},
"mojeek": {
"all_locale": null,
"custom": {
@@ -6634,255 +6814,6 @@
},
"regions": {}
},
"privacywall": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"bg-BG": "BG",
"cs-CZ": "CZ",
"da-DK": "DK",
"de-AT": "AT",
"de-BE": "BE",
"de-CH": "CH",
"de-DE": "DE",
"de-LI": "LI",
"de-LU": "LU",
"el-CY": "CY",
"el-GR": "GR",
"en-AU": "AU",
"en-CA": "CA",
"en-GB": "GB",
"en-HK": "HK",
"en-IE": "IE",
"en-IN": "IN",
"en-MT": "MT",
"en-NZ": "NZ",
"en-PH": "PH",
"en-SG": "SG",
"en-US": "US",
"es-AR": "AR",
"es-CL": "CL",
"es-CO": "CO",
"es-ES": "ES",
"es-MX": "MX",
"es-PE": "PE",
"es-VE": "VE",
"et-EE": "EE",
"fi-FI": "FI",
"fil-PH": "PH",
"fr-BE": "BE",
"fr-CA": "CA",
"fr-CH": "CH",
"fr-FR": "FR",
"fr-LU": "LU",
"ga-IE": "IE",
"gsw-CH": "CH",
"gsw-LI": "LI",
"hi-IN": "IN",
"hr-HR": "HR",
"hu-HU": "HU",
"id-ID": "ID",
"it-CH": "CH",
"it-IT": "IT",
"ja-JP": "JP",
"ko-KR": "KR",
"lb-LU": "LU",
"lt-LT": "LT",
"lv-LV": "LV",
"mi-NZ": "NZ",
"ms-MY": "MY",
"ms-SG": "SG",
"mt-MT": "MT",
"nb-NO": "NO",
"nl-BE": "BE",
"nl-NL": "NL",
"nn-NO": "NO",
"pl-PL": "PL",
"pt-BR": "BR",
"pt-PT": "PT",
"qu-PE": "PE",
"ro-RO": "RO",
"sk-SK": "SK",
"sl-SI": "SI",
"sv-FI": "FI",
"sv-SE": "SE",
"ta-SG": "SG",
"th-TH": "TH",
"tr-CY": "CY",
"vi-VN": "VN",
"zh-HK": "HK",
"zh-SG": "SG",
"zh-TW": "TW"
}
},
"privacywall images": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"bg-BG": "BG",
"cs-CZ": "CZ",
"da-DK": "DK",
"de-AT": "AT",
"de-BE": "BE",
"de-CH": "CH",
"de-DE": "DE",
"de-LI": "LI",
"de-LU": "LU",
"el-CY": "CY",
"el-GR": "GR",
"en-AU": "AU",
"en-CA": "CA",
"en-GB": "GB",
"en-HK": "HK",
"en-IE": "IE",
"en-IN": "IN",
"en-MT": "MT",
"en-NZ": "NZ",
"en-PH": "PH",
"en-SG": "SG",
"en-US": "US",
"es-AR": "AR",
"es-CL": "CL",
"es-CO": "CO",
"es-ES": "ES",
"es-MX": "MX",
"es-PE": "PE",
"es-VE": "VE",
"et-EE": "EE",
"fi-FI": "FI",
"fil-PH": "PH",
"fr-BE": "BE",
"fr-CA": "CA",
"fr-CH": "CH",
"fr-FR": "FR",
"fr-LU": "LU",
"ga-IE": "IE",
"gsw-CH": "CH",
"gsw-LI": "LI",
"hi-IN": "IN",
"hr-HR": "HR",
"hu-HU": "HU",
"id-ID": "ID",
"it-CH": "CH",
"it-IT": "IT",
"ja-JP": "JP",
"ko-KR": "KR",
"lb-LU": "LU",
"lt-LT": "LT",
"lv-LV": "LV",
"mi-NZ": "NZ",
"ms-MY": "MY",
"ms-SG": "SG",
"mt-MT": "MT",
"nb-NO": "NO",
"nl-BE": "BE",
"nl-NL": "NL",
"nn-NO": "NO",
"pl-PL": "PL",
"pt-BR": "BR",
"pt-PT": "PT",
"qu-PE": "PE",
"ro-RO": "RO",
"sk-SK": "SK",
"sl-SI": "SI",
"sv-FI": "FI",
"sv-SE": "SE",
"ta-SG": "SG",
"th-TH": "TH",
"tr-CY": "CY",
"vi-VN": "VN",
"zh-HK": "HK",
"zh-SG": "SG",
"zh-TW": "TW"
}
},
"privacywall videos": {
"all_locale": null,
"custom": {},
"data_type": "traits_v1",
"languages": {},
"regions": {
"bg-BG": "BG",
"cs-CZ": "CZ",
"da-DK": "DK",
"de-AT": "AT",
"de-BE": "BE",
"de-CH": "CH",
"de-DE": "DE",
"de-LI": "LI",
"de-LU": "LU",
"el-CY": "CY",
"el-GR": "GR",
"en-AU": "AU",
"en-CA": "CA",
"en-GB": "GB",
"en-HK": "HK",
"en-IE": "IE",
"en-IN": "IN",
"en-MT": "MT",
"en-NZ": "NZ",
"en-PH": "PH",
"en-SG": "SG",
"en-US": "US",
"es-AR": "AR",
"es-CL": "CL",
"es-CO": "CO",
"es-ES": "ES",
"es-MX": "MX",
"es-PE": "PE",
"es-VE": "VE",
"et-EE": "EE",
"fi-FI": "FI",
"fil-PH": "PH",
"fr-BE": "BE",
"fr-CA": "CA",
"fr-CH": "CH",
"fr-FR": "FR",
"fr-LU": "LU",
"ga-IE": "IE",
"gsw-CH": "CH",
"gsw-LI": "LI",
"hi-IN": "IN",
"hr-HR": "HR",
"hu-HU": "HU",
"id-ID": "ID",
"it-CH": "CH",
"it-IT": "IT",
"ja-JP": "JP",
"ko-KR": "KR",
"lb-LU": "LU",
"lt-LT": "LT",
"lv-LV": "LV",
"mi-NZ": "NZ",
"ms-MY": "MY",
"ms-SG": "SG",
"mt-MT": "MT",
"nb-NO": "NO",
"nl-BE": "BE",
"nl-NL": "NL",
"nn-NO": "NO",
"pl-PL": "PL",
"pt-BR": "BR",
"pt-PT": "PT",
"qu-PE": "PE",
"ro-RO": "RO",
"sk-SK": "SK",
"sl-SI": "SI",
"sv-FI": "FI",
"sv-SE": "SE",
"ta-SG": "SG",
"th-TH": "TH",
"tr-CY": "CY",
"vi-VN": "VN",
"zh-HK": "HK",
"zh-SG": "SG",
"zh-TW": "TW"
}
},
"qwant": {
"all_locale": null,
"custom": {},
@@ -7424,222 +7355,6 @@
},
"regions": {}
},
"resulthunter": {
"all_locale": "all",
"custom": {
"ui_lang": {
"az": "az",
"bg": "bg",
"br": "br",
"ca": "ca",
"cs": "cs",
"cy": "cy",
"da": "da",
"de-DE": "de-de",
"el": "el",
"en-CA": "en-ca",
"en-GB": "en-gb",
"en-IN": "en-in",
"en-US": "en-us",
"es": "es",
"et": "et",
"eu": "eu",
"fi-FI": "fi-fi",
"fr-CA": "fr-ca",
"fr-FR": "fr-fr",
"gl": "gl",
"hr": "hr",
"hu": "hu",
"id": "id",
"it": "it",
"ja-JP": "ja-jp",
"ka": "ka",
"ko": "ko",
"lt": "lt",
"lv": "lv",
"ms": "ms",
"nb": "nb",
"nl": "nl",
"pl": "pl",
"pt-BR": "pt-br",
"ro": "ro",
"ru": "ru",
"sk": "sk",
"sl": "sl",
"sq-AL": "sq-al",
"sr": "sr",
"sr_Latn": "sr-latn",
"sv": "sv",
"sw-KE": "sw-ke",
"th": "th",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh": "zh",
"zh-TW": "zh-tw"
}
},
"data_type": "traits_v1",
"languages": {},
"regions": {
"ar-SA": "sa",
"da-DK": "dk",
"de-AT": "at",
"de-BE": "be",
"de-CH": "ch",
"de-DE": "de",
"en-AU": "au",
"en-CA": "ca",
"en-GB": "gb",
"en-HK": "hk",
"en-IN": "in",
"en-NZ": "nz",
"en-PH": "ph",
"en-US": "us",
"en-ZA": "za",
"es-AR": "ar",
"es-CL": "cl",
"es-ES": "es",
"es-MX": "mx",
"fi-FI": "fi",
"fil-PH": "ph",
"fr-BE": "be",
"fr-CA": "ca",
"fr-CH": "ch",
"fr-FR": "fr",
"gsw-CH": "ch",
"hi-IN": "in",
"id-ID": "id",
"it-CH": "ch",
"it-IT": "it",
"ja-JP": "jp",
"ko-KR": "kr",
"mi-NZ": "nz",
"ms-MY": "my",
"nb-NO": "no",
"nl-BE": "be",
"nl-NL": "nl",
"nn-NO": "no",
"pl-PL": "pl",
"pt-BR": "br",
"pt-PT": "pt",
"ru-RU": "ru",
"sv-FI": "fi",
"sv-SE": "se",
"tr-TR": "tr",
"zh-CN": "cn",
"zh-HK": "hk",
"zh-TW": "tw"
}
},
"resulthunter images": {
"all_locale": "all",
"custom": {
"ui_lang": {
"az": "az",
"bg": "bg",
"br": "br",
"ca": "ca",
"cs": "cs",
"cy": "cy",
"da": "da",
"de-DE": "de-de",
"el": "el",
"en-CA": "en-ca",
"en-GB": "en-gb",
"en-IN": "en-in",
"en-US": "en-us",
"es": "es",
"et": "et",
"eu": "eu",
"fi-FI": "fi-fi",
"fr-CA": "fr-ca",
"fr-FR": "fr-fr",
"gl": "gl",
"hr": "hr",
"hu": "hu",
"id": "id",
"it": "it",
"ja-JP": "ja-jp",
"ka": "ka",
"ko": "ko",
"lt": "lt",
"lv": "lv",
"ms": "ms",
"nb": "nb",
"nl": "nl",
"pl": "pl",
"pt-BR": "pt-br",
"ro": "ro",
"ru": "ru",
"sk": "sk",
"sl": "sl",
"sq-AL": "sq-al",
"sr": "sr",
"sr_Latn": "sr-latn",
"sv": "sv",
"sw-KE": "sw-ke",
"th": "th",
"tr": "tr",
"uk": "uk",
"vi": "vi",
"zh": "zh",
"zh-TW": "zh-tw"
}
},
"data_type": "traits_v1",
"languages": {},
"regions": {
"ar-SA": "sa",
"da-DK": "dk",
"de-AT": "at",
"de-BE": "be",
"de-CH": "ch",
"de-DE": "de",
"en-AU": "au",
"en-CA": "ca",
"en-GB": "gb",
"en-HK": "hk",
"en-IN": "in",
"en-NZ": "nz",
"en-PH": "ph",
"en-US": "us",
"en-ZA": "za",
"es-AR": "ar",
"es-CL": "cl",
"es-ES": "es",
"es-MX": "mx",
"fi-FI": "fi",
"fil-PH": "ph",
"fr-BE": "be",
"fr-CA": "ca",
"fr-CH": "ch",
"fr-FR": "fr",
"gsw-CH": "ch",
"hi-IN": "in",
"id-ID": "id",
"it-CH": "ch",
"it-IT": "it",
"ja-JP": "jp",
"ko-KR": "kr",
"mi-NZ": "nz",
"ms-MY": "my",
"nb-NO": "no",
"nl-BE": "be",
"nl-NL": "nl",
"nn-NO": "no",
"pl-PL": "pl",
"pt-BR": "br",
"pt-PT": "pt",
"ru-RU": "ru",
"sv-FI": "fi",
"sv-SE": "se",
"tr-TR": "tr",
"zh-CN": "cn",
"zh-HK": "hk",
"zh-TW": "tw"
}
},
"sepiasearch": {
"all_locale": null,
"custom": {},
+70 -120
View File
@@ -3,7 +3,6 @@
- :py:obj:`searx.enginelib.EngineCache`
- :py:obj:`searx.enginelib.Engine`
- :py:obj:`searx.enginelib.EngineAbout`
- :py:obj:`searx.enginelib.traits`
There is a command line for developer purposes and for deeper analysis. Here is
@@ -24,7 +23,7 @@ an example in which the command line is called in the development environment::
"""
__all__ = ["EngineCache", "Engine", "EngineAbout", "ENGINES_CACHE"]
__all__ = ["EngineCache", "Engine", "ENGINES_CACHE"]
import typing as t
import abc
@@ -32,7 +31,6 @@ from collections.abc import Callable
import logging
import string
import typer
import msgspec
from ..cache import ExpireCacheSQLite, ExpireCacheCfg
@@ -41,7 +39,7 @@ if t.TYPE_CHECKING:
from searx.enginelib.traits import EngineTraits
from searx.extended_types import SXNG_Response
from searx.result_types import EngineResults
from searx.search.processors import OfflineParamTypes, OnlineParamTypes, ProcessorType
from searx.search.processors import OfflineParamTypes, OnlineParamTypes
ENGINES_CACHE: ExpireCacheSQLite = ExpireCacheSQLite.build_cache(
ExpireCacheCfg(
@@ -180,58 +178,11 @@ class EngineCache:
return ENGINES_CACHE.secret_hash(name=name)
class EngineAbout(msgspec.Struct, kw_only=True):
"""Additional fields describing the engine.
.. code:: yaml
about:
website: https://example.com
wikidata_id: Q306656
official_api_documentation: https://example.com/api-doc
use_official_api: true
require_api_key: true
results: HTML
"""
# pylint: disable=too-few-public-methods
website: str = ""
"""Official web-site of the origin."""
wikidata_id: str = ""
"""`Wikidata ID <https://www.wikidata.org/wiki/Wikidata:Identifiers>`_"""
official_api_documentation: str = ""
"""URL of the official API (regardless of whether it is used)"""
use_official_api: bool = False
"""SearXNG engine makes use of the official API or not"""
require_api_key: bool = False
"""API requires a key or not."""
results: str = ""
"""Data format of the source (online-engines: of the response)."""
description: str = ""
"""Brief description of the engine and where it gets its data from.
This value should only be set as long as no description of the data source
is available via a :py:obj:`EngineAbout.wikidata_id`.
"""
language: str = ""
"""Deprecated! Migrate your setting from `engine.about.language` to
`engine.language`"""
class Engine(abc.ABC): # pylint: disable=too-few-public-methods
"""Class of engine instances build from YAML settings.
Further documentation see :ref:`general engine configuration`.
The defaults are taken from :py:obj:`searx.engines.ENGINE_DEFAULT_ARGS`.
.. hint::
This class is currently never initialized and only used for type hinting.
@@ -239,71 +190,39 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
logger: logging.Logger
# Common options of the engine module
# Common options in the engine module
engine_type: "ProcessorType" = "online"
engine_type: str
"""Type of the engine (:ref:`searx.search.processors`)"""
paging: bool = False
paging: bool
"""Engine supports multiple pages."""
max_page: int = 0
"""If the engine supports paging, then this is the value for the last page
that is still supported. ``0`` means unlimited numbers of pages."""
time_range_support: bool = False
time_range_support: bool
"""Engine supports search time range."""
safesearch: bool = False
safesearch: bool
"""Engine supports SafeSearch"""
language_support: bool = False
language_support: bool
"""Engine supports languages (locales) search."""
fetch_traits: "Callable[[EngineTraits, bool], None]"
"""Function to to fetch engine's traits from origin."""
traits: "traits.EngineTraits"
"""Traits of the engine."""
# settings.yml
name: str
"""Name that will be used across SearXNG to define this engine. In settings, on
the result page .."""
engine: str
"""Name of the python file used to handle requests and responses to and from
this search engine (file name from :origin:`searx/engines` without
``.py``)."""
categories: list[str] = ["general"]
"""Specifies to which :ref:`engine categories` the engine should be added."""
language: str = ""
"""If the engine supports only one language, this language is specified here
(``en``, ``de``, ``"no"`` or ..); otherwise, the value remains empty. For
the YAML configuration: think of the `YAML-Norway problem
<https://ruuda.nl/2023/the-yaml-document-from-hell#the-norway-problem>`_
language: str
"""For an engine, when there is ``language: ...`` in the YAML settings the engine
does support only this one language:
.. code:: yaml
- name: google norway
- name: google french
engine: google
language: "no"
Depending on ``language_support``, this value has similar but also slightly
different meanings.
- When ``language_support`` is **true**, the map of
:py:obj:`traits.EngineTraits.languages` is reduced to the selected
language
- When ``language_support`` is **false**, then the implementation of the
engine only supports this one ``language``
language: fr
"""
region: str = ""
region: str
"""For an engine, when there is ``region: ...`` in the YAML settings the engine
does support only this one region::
@@ -314,6 +233,26 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
region: fr-BE
"""
fetch_traits: "Callable[[EngineTraits, bool], None]"
"""Function to to fetch engine's traits from origin."""
traits: "traits.EngineTraits"
"""Traits of the engine."""
# settings.yml
categories: list[str]
"""Specifies to which :ref:`engine categories` the engine should be added."""
name: str
"""Name that will be used across SearXNG to define this engine. In settings, on
the result page .."""
engine: str
"""Name of the python file used to handle requests and responses to and from
this search engine (file name from :origin:`searx/engines` without
``.py``)."""
enable_http: bool
"""Enable HTTP (by default only HTTPS is enabled)."""
@@ -326,31 +265,6 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
display_error_messages: bool
"""Display error messages on the web UI."""
disabled: bool = False
"""To disable by default the engine, but not deleting it. It will allow the
user to manually activate it in the settings."""
inactive: bool = False
"""Remove the engine from the settings (*disabled & removed*)."""
about: EngineAbout = EngineAbout()
"""Additional fields describing the engine."""
using_tor_proxy: bool = False
"""Using tor proxy (``true``) or not (``false``) for this engine."""
send_accept_language_header: bool = True
"""When this option is activated (default), the language (locale) that is
selected by the user is used to build and send a ``Accept-Language`` header
in the request to the origin search engine."""
tokens: list[str] = []
"""A list of secret tokens to make this engine *private*, more details see
:ref:`private engines`."""
weight: float = 1.0
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
proxies: dict[str, dict[str, str]]
"""Set proxies for a specific engine (YAML):
@@ -361,6 +275,42 @@ class Engine(abc.ABC): # pylint: disable=too-few-public-methods
https: socks5://proxy:port
"""
disabled: bool
"""To disable by default the engine, but not deleting it. It will allow the
user to manually activate it in the settings."""
inactive: bool
"""Remove the engine from the settings (*disabled & removed*)."""
about: dict[str, dict[str, str]]
"""Additional fields describing the engine.
.. code:: yaml
about:
website: https://example.com
wikidata_id: Q306656
official_api_documentation: https://example.com/api-doc
use_official_api: true
require_api_key: true
results: HTML
"""
using_tor_proxy: bool
"""Using tor proxy (``true``) or not (``false``) for this engine."""
send_accept_language_header: bool
"""When this option is activated (default), the language (locale) that is
selected by the user is used to build and send a ``Accept-Language`` header
in the request to the origin search engine."""
tokens: list[str]
"""A list of secret tokens to make this engine *private*, more details see
:ref:`private engines`."""
weight: int
"""Weighting of the results of this engine (:ref:`weight <settings engines>`)."""
def setup(self, engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused-argument
"""Dynamic setup of the engine settings.
+12 -15
View File
@@ -142,11 +142,11 @@ class EngineTraits:
"""
if self.data_type == "traits_v1":
self._set_traits_v1(engine) # pyright: ignore[reportArgumentType]
self._set_traits_v1(engine)
else:
raise TypeError("engine traits of type %s is unknown" % self.data_type)
def _set_traits_v1(self, engine: "Engine") -> None:
def _set_traits_v1(self, engine: "Engine | types.ModuleType") -> None:
# For an engine, when there is `language: ...` in the YAML settings the engine
# does support only this one language (region)::
#
@@ -159,25 +159,22 @@ class EngineTraits:
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
if engine.language:
if engine.language_support:
if not len(traits.languages) > 1:
raise ValueError(
f"engine {engine.name}: activated language_support with just one or less languages"
)
if engine.language not in traits.languages:
raise ValueError(_msg % (engine.name, "language", engine.language))
traits.languages = {engine.language: traits.languages[engine.language]}
languages = traits.languages
if hasattr(engine, "language"):
if engine.language not in languages:
raise ValueError(_msg % (engine.name, "language", engine.language))
traits.languages = {engine.language: languages[engine.language]}
if engine.region:
if engine.region not in traits.regions:
regions = traits.regions
if hasattr(engine, "region"):
if engine.region not in regions:
raise ValueError(_msg % (engine.name, "region", engine.region))
traits.regions = {engine.region: traits.regions[engine.region]}
traits.regions = {engine.region: regions[engine.region]}
engine.language_support = bool(traits.languages or traits.regions)
# set the copied & modified traits in engine's namespace
engine.traits = traits
engine.traits = traits # pyright: ignore[reportAttributeAccessIssue]
class EngineTraitsMap(dict[str, EngineTraits]):
+1 -1
View File
@@ -22,8 +22,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "zh",
}
language = "zh"
# Engine Configuration
categories = ["general"]
+6 -6
View File
@@ -5,19 +5,19 @@ intended monkey patching of the engine modules.
.. attention::
Monkey-patching modules is a practice from the past that shouldn't be
expanded upon. In the long run, engines should be instances of
:py:obj:`searx.enginelib.Engine`. However, as long as long as all engine
modules aren't converted to this class, these builtin types will still be
needed.
expanded upon. In the long run, there should be an engine class that can be
inherited. However, as long as this class doesn't exist, and as long as all
engine modules aren't converted to an engine class, these builtin types will
still be needed.
"""
import logging
from searx.enginelib import traits as _traits
logger: logging.Logger
supported_languages: str
language_aliases: str
language_support: bool
language: str
region: str
traits: _traits.EngineTraits
# from searx.engines.ENGINE_DEFAULT_ARGS
+9 -50
View File
@@ -12,50 +12,41 @@ import typing as t
import sys
import copy
import os
from os.path import realpath, dirname
import warnings
import types
import inspect
import msgspec
from searx import logger, settings
from searx.utils import load_module
from searx.data import ENGINE_TRAITS
from searx.enginelib import Engine, EngineAbout
if t.TYPE_CHECKING:
from searx.enginelib import Engine
logger = logger.getChild('engines')
ENGINE_DIR = dirname(realpath(__file__))
# Defaults for the namespace of an engine module, see load_engine()
ENGINE_DEFAULT_ARGS: dict[str, t.Any] = {
ENGINE_DEFAULT_ARGS: dict[str, int | str | list[t.Any] | dict[str, t.Any] | bool] = {
# Common options in the engine module
"engine_type": "online",
"paging": False,
"max_page": 0,
"time_range_support": False,
"safesearch": False,
"language_support": False,
# settings.yml
"categories": ["general"],
"language": "",
"region": "",
"enable_http": False,
"shortcut": "-",
"timeout": settings["outgoing"]["request_timeout"],
"display_error_messages": True,
"disabled": False,
"inactive": False,
"about": EngineAbout(),
"about": {},
"using_tor_proxy": False,
"send_accept_language_header": True,
"tokens": [],
"weight": 1.0,
"max_page": 0,
}
"""Default values that are set in an engine of type *module*, please compare
with the class :py:obj:`searx.enginelib.Engine`."""
# set automatically when an engine does not have any tab category
DEFAULT_CATEGORY = 'other'
@@ -185,41 +176,14 @@ def set_loggers(engine: "Engine|types.ModuleType", engine_name: str):
def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: dict[str, t.Any]):
# pylint: disable=too-many-branches
# set engine attributes from engine_data
kvargs: dict[str, t.Any]
if isinstance(engine.about, EngineAbout):
kvargs = {**msgspec.to_builtins(engine.about), **engine_data.get("about", {})}
else:
kvargs = {**engine.about, **engine_data.get("about", {})}
try:
engine.about = EngineAbout(**kvargs)
except TypeError as exc:
raise TypeError(
f"engine '{engine_data['name']}' ({engine_data['engine']}) - in the about section --> {exc}"
) from exc
# warn about deprecated engine settings
if engine.about.language:
if hasattr(engine, "language") and not engine.language:
engine.language = engine.about.language
warnings.warn(
f"engine '{engine_data['name']}' ({engine_data['engine']})"
f" - migrate engine.about.language to engine.language!",
DeprecationWarning,
2,
)
for param_name, param_value in engine_data.items():
if param_name == "about":
continue
if param_name == 'categories':
if isinstance(param_value, str):
param_value = list(map(str.strip, param_value.split(',')))
engine.categories = param_value # type: ignore
elif hasattr(engine, 'about') and param_name == 'about':
engine.about = {**engine.about, **engine_data['about']} # type: ignore
else:
setattr(engine, param_name, param_value)
@@ -228,9 +192,6 @@ def update_engine_attributes(engine: "Engine | types.ModuleType", engine_data: d
if not hasattr(engine, arg_name):
setattr(engine, arg_name, copy.deepcopy(arg_value))
if ENGINE_TRAITS.get(engine.name, {}).get("languages") and not engine.language_support:
raise ValueError(f"engine '{engine.name}' ({engine_data['engine']}) language_support should be set to True")
def update_attributes_for_tor(engine: "Engine | types.ModuleType"):
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
@@ -317,8 +278,6 @@ def load_engines(engine_list: list[dict[str, t.Any]]):
else:
# if an engine can't be loaded (if for example the engine is missing
# tor or some other requirements) its set to inactive!
logger.error(
f"(PID {os.getpid()}) loading engine %s failed: set engine to inactive!", engine_data.get("name", "???")
)
logger.error("loading engine %s failed: set engine to inactive!", engine_data.get("name", "???"))
engine_data["inactive"] = True
return engines
+1 -1
View File
@@ -16,12 +16,12 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "zh",
}
# Engine Configuration
categories = ["videos"]
paging = True
language = "zh"
# Base URL
base_url = "https://www.acfun.cn"
-1
View File
@@ -64,7 +64,6 @@ about: dict[str, t.Any] = {
# engine dependent config
categories = ["files", "books"]
paging: bool = True
language_support = True
# search-url
base_url: list[str] | str = []
+1 -1
View File
@@ -42,8 +42,8 @@ about = {
'use_official_api': False,
'require_api_key': False,
'results': 'HTML',
'language': 'it',
}
language = "it"
def request(query, params):
+210
View File
@@ -0,0 +1,210 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""AOL supports WEB, image, and video search. Internally, it uses the Bing
index.
AOL doesn't seem to support setting the language via request parameters, instead
the results are based on the URL. For example, there is
- `search.aol.com <https://search.aol.com>`_ for English results
- `suche.aol.de <https://suche.aol.de>`_ for German results
However, AOL offers its services only in a few regions:
- en-US: search.aol.com
- de-DE: suche.aol.de
- fr-FR: recherche.aol.fr
- en-GB: search.aol.co.uk
- en-CA: search.aol.ca
In order to still offer sufficient support for language and region, the `search
keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the
search term (AOL is basically just a proxy for Bing).
.. _search keywords:
https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a
"""
from urllib.parse import urlencode, unquote_plus
import typing as t
from lxml import html
from dateutil import parser
from searx.result_types import EngineResults
from searx.utils import eval_xpath_list, eval_xpath, extract_text
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://www.aol.com",
"wikidata_id": "Q2407",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
categories = ["general"]
search_type = "search" # supported: search, image, video
paging = True
safesearch = True
time_range_support = True
results_per_page = 10
base_url = "https://search.aol.com"
time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"}
safesearch_map = {0: "p", 1: "r", 2: "i"}
enable_http2 = False
def init(_):
if search_type not in ("search", "image", "video"):
raise ValueError(f"unsupported search type {search_type}")
def request(query: str, params: "OnlineParams") -> None:
language, region = (params["searxng_locale"].split("-") + [None])[:2]
if language and language != "all":
query = f"{query} language:{language}"
if region:
query = f"{query} loc:{region}"
args: dict[str, str | int | None] = {
"q": query,
"b": params["pageno"] * results_per_page + 1, # page is 1-indexed
"pz": results_per_page,
}
if params["time_range"]:
args["fr2"] = "time"
args["age"] = params["time_range"]
else:
args["fr2"] = "sb-top-search"
params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}"
params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}"
logger.debug(params)
def _deobfuscate_url(obfuscated_url: str) -> str | None:
# URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long
if not obfuscated_url:
return None
for part in obfuscated_url.split("/"):
if part.startswith("RU="):
return unquote_plus(part[3:])
# pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link
return obfuscated_url
def _general_results(doc: html.HtmlElement) -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"):
obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href"))
if not obfuscated_url:
continue
url = _deobfuscate_url(obfuscated_url)
if not url:
continue
res.add(
res.types.MainResult(
url=url,
title=extract_text(eval_xpath(result, ".//h3/a")) or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "",
thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "",
)
)
return res
def _video_results(doc: html.HtmlElement) -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"):
obfuscated_url = extract_text(eval_xpath(result, ".//a/@href"))
if not obfuscated_url:
continue
url = _deobfuscate_url(obfuscated_url)
if not url:
continue
published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]"))
try:
published_date = parser.parse(published_date_raw or "")
except parser.ParserError:
published_date = None
res.add(
res.types.LegacyResult(
{
"template": "videos.html",
"url": url,
"title": extract_text(eval_xpath(result, ".//h3")),
"content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")),
"thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")),
"length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")),
"publishedDate": published_date,
}
)
)
return res
def _image_results(doc: html.HtmlElement) -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"):
obfuscated_url = extract_text(eval_xpath(result, "./a/@href"))
if not obfuscated_url:
continue
url = _deobfuscate_url(obfuscated_url)
if not url:
continue
res.add(
res.types.LegacyResult(
{
"template": "images.html",
# results don't have an extra URL, only the image source
"url": url,
"title": extract_text(eval_xpath(result, ".//a/@aria-label")),
"thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")),
"img_src": url,
}
)
)
return res
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
match search_type:
case "search":
results = _general_results(doc)
case "image":
results = _image_results(doc)
case "video":
results = _video_results(doc)
case _:
raise ValueError("unsupported search type")
for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"):
results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)}))
return results
-1
View File
@@ -35,7 +35,6 @@ about = {
categories = ["it", "software wikis"]
paging = True
main_wiki = "wiki.archlinux.org"
language_support = True
def request(query, params):
+1 -1
View File
@@ -54,8 +54,8 @@ about = {
"use_official_api": True,
"require_api_key": True,
"results": "JSON",
"language": "en",
}
language = "en"
CACHE: EngineCache
"""Persistent (SQLite) key/value cache that deletes its values after ``expire``
+1 -1
View File
@@ -23,8 +23,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
"language": "zh",
}
language = "zh"
paging = True
categories = []
+3 -2
View File
@@ -51,10 +51,11 @@ def request(query, params):
}
params["url"] = f"{base_url}?{urlencode(query_params)}"
params["headers"]["Referer"] = "https://www.bilibili.com/"
params["headers"]["Accept"] = "application/json, text/javascript, */*; q=0.01"
params["headers"]["Referer"] = "https://www.bilibili.com"
params["cookies"] = cookie
return params
def response(resp):
search_res = resp.json()
-1
View File
@@ -34,7 +34,6 @@ about = {
categories = ["general", "social media"]
paging = True
time_range_support = True
language_support = True
base_url = "https://boardreader.com"
time_range_map = {"day": "1", "week": "7", "month": "30", "year": "365"}
+1 -1
View File
@@ -13,8 +13,8 @@ about = {
'use_official_api': False,
'require_api_key': False,
'results': 'JSON',
'language': 'de',
}
language = "de"
paging = True
categories = ['general']
-115
View File
@@ -1,115 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Chatnoir is an open source search engine developed by Webis, a network of
researchers from the universities of Weimar, Halle and Leipzig. It supports
different different text corpora as indexes, e.g. CommonCrawl. See its
`announcement`_ for more information.
.. _announcement : https://groups.google.com/g/common-crawl/c/3o2dOHpeRxo/m/H2Osqz9dAAAJ
"""
import typing as t
from searx.exceptions import SearxEngineAPIException
from searx.extended_types import SXNG_Response
from searx.network import get, post
from searx.result_types import EngineResults
from searx.utils import html_to_text
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
about = {
"website": "https://www.chatnoir.eu",
"official_api_documentation": "https://www.chatnoir.eu/docs/api-general",
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://www.chatnoir.eu"
categories = ["general"]
paging = True
page_size = 10
api_key = ""
"""You can optionally provide your own API key here. This one will then be used
instead of scraping an API key."""
search_index = "cw22"
"""Search index to browse in. See `the API documentation
<https://www.chatnoir.eu/docs/api-general>`_ for a full list."""
def _obtain_api_key() -> tuple[str, str, str]:
home_resp = get(base_url)
if not home_resp.ok:
raise SearxEngineAPIException("failed to obtain api key")
csrf_token = home_resp.cookies["csrftoken"]
token_resp = post(
"https://www.chatnoir.eu/?init",
headers={
"Referer": f"{base_url}/",
"X-Requested-With": "XMLHttpRequest",
"X-Csrf-Token": csrf_token,
},
cookies=home_resp.cookies,
)
if not token_resp.ok:
raise SearxEngineAPIException("failed to obtain api key")
session_id = token_resp.cookies["sessionid"]
scraped_api_key = token_resp.json()["token"]["token"]
return csrf_token, session_id, scraped_api_key
def request(query: str, params: "OnlineParams"):
if api_key:
# use user-provided API key instead of scraping one
headers = {
"Authorization": f"Bearer {api_key}",
}
params["headers"].update(headers)
else:
csrf_token, session_id, scraped_api_key = _obtain_api_key()
headers = {
"Authorization": f"Bearer {scraped_api_key}",
"X-Csrf-Token": csrf_token,
}
params["headers"].update(headers)
params["cookies"] = {"csrftoken": session_id, "sessionid": session_id}
params["url"] = f"{base_url}/api/v1/_search"
params["method"] = "POST"
json_data = {
"query": query,
"index": [
search_index,
],
"from": (params["pageno"] - 1) * page_size,
"size": page_size,
"_extended_meta": True,
}
params["json"] = json_data
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
results = resp.json()["results"]
for result in results:
res.add(
res.types.MainResult(
url=result["target_uri"],
title=html_to_text(result["title"]),
content=html_to_text(result["snippet"]),
)
)
return res
+1 -1
View File
@@ -10,8 +10,8 @@ about = {
'use_official_api': False,
'require_api_key': False,
'results': 'JSON',
'language': 'de',
}
language = "de"
paging = True
categories = []
+1 -8
View File
@@ -70,13 +70,13 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
"language": "zh",
}
paging = True
time_range_support = True
results_per_page = 10
categories = []
language = "zh"
ChinasoCategoryType = t.Literal['news', 'videos', 'images']
"""ChinaSo supports news, videos, images search.
@@ -156,13 +156,6 @@ def response(resp):
except Exception as e:
raise SearxEngineAPIException(f"Invalid response: {e}") from e
# Upstream returns {'status': 0, 'msg': 'empty result', 'data': {}} when there
# are no results; this is a valid empty result rather than an API error.
if not isinstance(data, dict) or "data" not in data:
raise SearxEngineAPIException("Invalid response")
if not data["data"]:
return []
parsers = {'news': parse_news, 'images': parse_images, 'videos': parse_videos}
return parsers[chinaso_category](data)
-1
View File
@@ -40,7 +40,6 @@ categories = ["videos"]
paging = True
page_size = 10
language_support = True
time_range_support = True
time_delta_dict = {
"day": timedelta(days=1),
+8 -6
View File
@@ -24,7 +24,7 @@ import typing as t
import json
from searx.result_types import EngineResults
from searx.enginelib import EngineCache, EngineAbout
from searx.enginelib import EngineCache
if t.TYPE_CHECKING:
from searx.search.processors import RequestParams
@@ -35,11 +35,13 @@ categories = ["general"]
disabled = True
timeout = 2.0
language = "en"
about = EngineAbout(
results="JSON",
description="Demo offline engine Engine with results in the English language.",
)
about = {
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
# if there is a need for globals, use a leading underline
_my_offline_engine: str = ""
+8 -9
View File
@@ -25,7 +25,6 @@ import typing as t
from urllib.parse import urlencode
from searx.result_types import EngineResults
from searx.enginelib import EngineAbout
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
@@ -44,14 +43,14 @@ page_size = 20
search_api = "https://api.artic.edu/api/v1/artworks/search"
image_api = "https://www.artic.edu/iiif/2/"
about = EngineAbout(
website="https://www.artic.edu",
wikidata_id="Q239303",
official_api_documentation="http://api.artic.edu/docs/",
use_official_api=True,
require_api_key=False,
results="JSON",
)
about = {
"website": "https://www.artic.edu",
"wikidata_id": "Q239303",
"official_api_documentation": "http://api.artic.edu/docs/",
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
}
# if there is a need for globals, use a leading underline
+1 -1
View File
@@ -11,8 +11,8 @@ about = {
'use_official_api': False,
'require_api_key': False,
'results': 'HTML',
'language': 'de',
}
language = "de"
categories = []
paging = True
-101
View File
@@ -1,101 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Dogpile is a metasearch engine by the American advertising company `System1`_.
.. _System1: https://system1.com/
"""
import typing as t
from datetime import datetime, timezone
import html
from searx.utils import format_duration, html_to_text, humanize_number
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://www.dogpile.com",
"wikidata_id": "Q3595363",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
paging = True
safesearch = True
categories = ["general"]
dogpile_categ = "search"
"""Category to search in. Can be either "search", "images", "videos" or "news"."""
base_url = "https://www.dogpile.com"
safe_search_map = {0: "none", 1: "moderate", 2: "heavy"}
def init(_):
if dogpile_categ not in ("search", "images", "videos", "news"):
raise ValueError("invalid search type: %s" % dogpile_categ)
def request(query: str, params: "OnlineParams"):
params["url"] = f"{base_url}/api/{dogpile_categ}"
params["method"] = "POST"
params["json"] = {"q": query, "qadf": safe_search_map[params["safesearch"]], "page": params["pageno"]}
return params
def response(resp: "SXNG_Response"):
res = EngineResults()
json_resp = resp.json()
for result in json_resp["results"]:
if dogpile_categ == "search":
res.add(
res.types.MainResult(
url=result["clickUrl"],
title=html_to_text(result["title"]),
content=html_to_text(result["description"]),
)
)
elif dogpile_categ == "news":
res.add(
res.types.MainResult(
url=result["clickUrl"],
title=html_to_text(html.unescape(result["title"])),
content=html_to_text(html.unescape(result["description"])),
thumbnail=result["thumbnailUrl"],
publishedDate=datetime.fromtimestamp(result["date"], tz=timezone.utc),
)
)
elif dogpile_categ == "videos":
res.add(
res.types.LegacyResult(
template="videos.html",
url=result["clickUrl"],
title=html_to_text(result["title"]),
content=html_to_text(result["description"]),
thumbnail=result["thumbnailUrl"],
publishedDate=datetime.fromisoformat(result["publishDate"]),
length=format_duration(result["duration"]),
views=humanize_number(result["viewCount"]),
)
)
elif dogpile_categ == "images":
res.add(
res.types.Image(
url=result["altClickUrl"],
title=html_to_text(result["title"]),
content=html_to_text(result["description"]),
img_src=result["clickUrl"],
thumbnail_src=result["thumbnailUrl"],
resolution=f"{result['width']}x{result['height']}",
img_format=result["format"],
)
)
return res
-1
View File
@@ -203,7 +203,6 @@ about: dict[str, str | bool] = {
categories: list[str] = ["general", "web"]
paging: bool = True
time_range_support: bool = True
language_support = True
safesearch: bool = True
"""DDG-lite: user can't select but the results are filtered."""
-3
View File
@@ -28,7 +28,6 @@ about = {
"require_api_key": False,
"results": "JSON (site requires js to get images)",
}
language_support = True
# engine dependent config
categories = []
@@ -42,9 +41,7 @@ safesearch_cookies = {0: "-2", 1: None, 2: "1"}
safesearch_args = {0: "1", 1: None, 2: "1"}
search_path_map = {"images": "i", "videos": "v", "news": "news"}
_HTTP_User_Agent: str = gen_useragent()
send_accept_language_header = False
def init(engine_settings: dict[str, t.Any]):
-1
View File
@@ -26,7 +26,6 @@ about = {
"require_api_key": False,
"results": "JSON",
}
language_support = True
# engine dependent config
categories = ["weather"]
-156
View File
@@ -1,156 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""DuckDuckGo Web (general)
This implementation fetches the link to the first API page
(i.e. ``links.duckduckgo.com/d.js?...``) from duckduckgo.com and uses the ``n``
parameter of the API to fetch all subsequent pages.
This also means that it's not possible to immediately search for the third
page - the first and the second page would need to be loaded first.
The reason why we can't just normally use the `vqd` value is that the API URLs
require an additional parameter `dp` which seems generated at server-side, so we
can't build it ourselves and must scrape it from the HTML pages.
"""
import typing as t
from urllib.parse import quote_plus
from lxml import html
from searx.utils import html_to_text, gen_useragent, extract_text, eval_xpath
from searx.result_types import EngineResults
from searx.enginelib import EngineCache
from searx.network import get
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://duckduckgo.com/",
"wikidata_id": "Q12805",
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
# engine dependent config
categories = ["general"]
paging = True
_HTTP_User_Agent: str = gen_useragent()
base_url = "https://duckduckgo.com"
CACHE: EngineCache
"""Cache to store the API URLs for combinations of (query, page)."""
def setup(engine_settings: dict[str, str]):
global CACHE # pylint:disable=global-statement
CACHE = EngineCache(engine_settings["name"])
return CACHE
def _fetch_first_page_link(
query: str,
headers: dict[str, str],
):
"""Search for a::
<link id="deep_preload_link" rel="preload" as="script"
href="https://links.duckduckgo.com/d.js?q=rust&t=D&l=us-en&s=0&a=h_&ct=DE&vqd=VQD_VALUE&bing_market=en-US&p_ent=&ex=-1&dp=LONG_TOKEN
>
This points to the first page
""" # pylint:disable=line-too-long
cache_key = _cache_key(query, 1)
cached: str | None = CACHE.get(cache_key)
if cached:
return cached
resp = get(
url=f"{base_url}/?q={quote_plus(query)}&t=h_&ia=web",
headers=headers,
timeout=2,
)
if resp.status_code != 200:
logger.error("vqd: got HTTP %s from duckduckgo.com", resp.status_code)
dom = html.fromstring(resp.text)
first_page_link = extract_text(eval_xpath(dom, "//link[@id='deep_preload_link']/@href"))
if not first_page_link:
logger.error("vqd: failed to load first page JS url from ddg response (return empty string)")
return ""
logger.debug("got link to first page from duckduckgo.com request: '%s'", first_page_link)
CACHE.set(cache_key, first_page_link, expire=7200)
return first_page_link
def _cache_key(query: str, pageno: int) -> str:
return f"nextpage_url|{query}|{pageno}"
def request(query: str, params: "OnlineParams") -> None:
if len(query) >= 500:
# DDG does not accept queries with more than 499 chars
params["url"] = None
return
headers = params["headers"]
# The vqd value is generated from the query and the UA header. To be able
# to reuse the vqd value, the UA header must be static.
headers["User-Agent"] = _HTTP_User_Agent
headers["Accept"] = "*/*"
headers["Referer"] = f"{base_url}/"
headers["Host"] = "duckduckgo.com"
# Sec-Fetch headers are required to not get blocked when sending a Firefox user agent
headers["Sec-Fetch-Dest"] = "script"
headers["Sec-Fetch-Mode"] = "no-cors"
headers["Sec-Fetch-Site"] = "same-site"
api_url = ""
if params["pageno"] > 1:
api_url = CACHE.get(_cache_key(query, params["pageno"]))
else:
api_url = _fetch_first_page_link(query, headers)
if not api_url:
params["url"] = None
return
params["url"] = api_url.replace("/d.js?", "/d.js?o=json&")
# TODO: support safesearch, timerange and engine traits # pylint:disable=fixme
def response(resp: "SXNG_Response"):
res = EngineResults()
res_json = resp.json()
for result in res_json["results"]:
if "u" not in result:
continue
res.add(
res.types.MainResult(url=result["u"], title=html_to_text(result["t"]), content=html_to_text(result["a"]))
)
# link to next page
next_page_path = res_json["results"][-1].get("n")
if next_page_path:
CACHE.set(
_cache_key(resp.search_params["query"], resp.search_params["pageno"] + 1),
base_url + next_page_path,
expire=60 * 60,
)
return res
+1 -1
View File
@@ -14,8 +14,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
"language": 'de',
}
language = "de"
categories = ['dictionaries']
paging = True
+1 -1
View File
@@ -55,7 +55,7 @@ about = {
'official_api_documentation': 'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html',
'use_official_api': True,
'require_api_key': False,
"results": "JSON",
'format': 'JSON',
}
base_url = 'http://localhost:9200'
-118
View File
@@ -1,118 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""FindFiles.net_ is a Germany-based file search engine.
FindFiles.net_ is a specialized file search engine designed to help you search
files online with precision. Unlike traditional search engines that mainly index
web pages, FindFiles focuses on finding real files on the internet - including
PDFs, documents, archives, videos, datasets, and more.
.. _FindFiles.net: https://findfiles.net
"""
from os.path import basename
from urllib.parse import urlencode
import typing as t
from lxml import html
from searx.result_types import EngineResults
from searx.utils import extract_text, eval_xpath, eval_xpath_list
if t.TYPE_CHECKING:
from extended_types import SXNG_Response
from search.processors import OnlineParams
about = {
"website": "https://findfiles.net",
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
base_url = "https://findfiles.net"
categories = ["files"]
paging = True
safeserach = True
safesearch_map = {
0: "contentguard.off",
1: "contentguard.moderate",
2: "contentguard.strict",
}
FindFilesCategory = t.Literal[
"all",
"document",
"text",
"image",
"audio",
"video",
]
FINDFILES_CATEGORIES = t.get_args(FindFilesCategory)
findfiles_categ: FindFilesCategory = "all"
"""Category to search in."""
def setup(_: dict[str, t.Any]) -> bool:
if findfiles_categ not in FINDFILES_CATEGORIES:
raise ValueError("invalid category: %s" % findfiles_categ)
return True
def request(query: str, params: "OnlineParams") -> None:
args = {
"query": query,
"contentguard": safesearch_map[params["safesearch"]],
"page": params["pageno"],
}
# the language in the path doesn't change anything about the results, it
# only changes the UI
params["url"] = f"{base_url}/en/serp/{findfiles_categ}/?{urlencode(args)}"
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
dom = html.fromstring(resp.text)
if findfiles_categ == "image":
for result in eval_xpath_list(
dom, "//div[contains(@class, 'image-mosaic')]/div[contains(@class, 'image-item')]"
):
res.add(
res.types.Image(
url=extract_text(eval_xpath(result, ".//div[contains(@class, 'caption')]/a/@href")) or "",
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'caption')]/a")) or "",
thumbnail_src=extract_text(eval_xpath(result, ".//img/@src")) or "",
)
)
elif findfiles_categ == "video":
for result in eval_xpath_list(
dom, "//div[contains(@class, 'video-mosaic')]/div[contains(@class, 'video-item')]"
):
video_src = extract_text(eval_xpath(result, ".//video/@src")) or ""
res.add(
res.types.LegacyResult(
template="videos.html",
url=video_src,
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'caption')]/span")) or "",
iframe_src=video_src or "",
)
)
else:
for result in eval_xpath_list(dom, "//ol/li[contains(@class, 'result-item')]/article"):
filename = basename(extract_text(eval_xpath(result, ".//h3")) or "")
res.add(
res.types.File(
url=extract_text(eval_xpath(result, ".//h3/a/@href")) or "",
title=filename,
content=" ".join(extract_text(el) or "" for el in eval_xpath_list(result, "./div/span")),
filename=filename,
size=extract_text(eval_xpath(result, "(.//span[@id])[1]")) or "",
embedded=extract_text(eval_xpath(result, ".//audio/@src")) or "",
)
)
return res
-169
View File
@@ -1,169 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fireball_ is a Germany-based, privacy-focused search engine.
It likely doesn't have its own index, but it's unclear where its results come
from.
.. _Fireball: https://fireball.com
"""
import typing as t
from datetime import datetime
from urllib.parse import urlencode
from searx.enginelib import EngineCache
from searx.exceptions import SearxEngineAPIException
from searx.extended_types import SXNG_Response
from searx.result_types import EngineResults
from searx.network import post
from searx.utils import html_to_text
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
about = {
"website": "https://fireball.com",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://fireball.com"
categories = ["general"]
fireball_category = "web" # values: "web", "news", "videos"
paging = False
safesearch = True
safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
CACHE: EngineCache
"""Cache to store the settings cookie (contains e.g. language, safesearch, ...)."""
CACHE_VALID_DURATION = 30 * 24 * 3600 # one month, same as website
"""Duration how long settings cookies are valid."""
def init(engine_settings: dict[str, t.Any]):
global CACHE # pylint: disable=global-statement
CACHE = EngineCache(engine_settings["name"])
if fireball_category not in ("web", "news", "videos"):
raise ValueError(f"Unsupported category: {fireball_category}")
def _cache_key(fireball_settings: dict[str, str]) -> str:
return f"fireball_settings_{fireball_settings['safesearch']}_{fireball_settings['market']}"
def _get_search_settings_cookie(params: 'OnlineParams') -> str:
"""Get a 'fireball' cookie for the given locale and safesearch setting set
in params."""
# the language is set by only specifying the search country on their
# website, they only list DE and US, but in fact it supports much more
# countries
country = "US"
if params["searxng_locale"] != "all":
language_parts = params["searxng_locale"].split("-")
country = language_parts[-1].upper()
fireball_settings = {
"action": "save",
"language": "en", # language is irrelevant, only changes UI language
"market": country,
"adprovider": "automatic",
"target": "_blank",
"tiles": "on",
"safesearch": safe_search_map[params["safesearch"]],
}
cache_key = _cache_key(fireball_settings)
cached_cookie = CACHE.get(cache_key)
if cached_cookie:
return cached_cookie
resp = post("https://fireball.com/settings", data=fireball_settings)
if not resp.ok:
raise SearxEngineAPIException("failed to obtain cookie for settings")
cookie = resp.cookies.get("fireball")
if not cookie:
raise SearxEngineAPIException("failed to obtain cookie for settings")
CACHE.set(cache_key, cookie, expire=CACHE_VALID_DURATION)
return cookie
def request(query: str, params: "OnlineParams"):
# no matter the category, the request is always the same, i.e. we get all
# different categories with one HTTP request
args = {
"f": "web",
"q": query,
}
params["url"] = f"{base_url}/getResults/?{urlencode(args)}"
params["cookies"]["fireball"] = _get_search_settings_cookie(params)
# referer header has to be set, otherwise the requests get blocked
params["headers"]["Referer"] = f"{base_url}/search?{urlencode(args)}"
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
json_data = resp.json()
for result in json_data.get(fireball_category, {}).get("results", []):
published_date = None
if result.get("page_age"):
published_date = datetime.fromisoformat(result["page_age"])
if fireball_category == "web":
res.add(
res.types.MainResult(
url=result["url"],
title=html_to_text(result["title"]),
content=html_to_text(result["description"]),
publishedDate=published_date,
)
)
elif fireball_category == "news":
thumbnail: str | None = None
if result.get("thumbnail"):
thumbnail = result["thumbnail"]["src"]
res.add(
res.types.MainResult(
url=result["url"],
title=html_to_text(result["title"]),
content=html_to_text(result["description"]),
thumbnail=thumbnail or "",
publishedDate=published_date,
)
)
elif fireball_category == "videos":
length = None
if result.get("video"):
length = result["video"].get("duration")
res.add(
res.types.LegacyResult(
{
"template": "videos.html",
"url": result["url"],
"title": html_to_text(result["title"]),
"content": html_to_text(result["description"]),
"thumbnail": result.get("thumbnail", {}).get("original"),
"length": length,
"publishedDate": published_date,
}
)
)
return res
+1 -5
View File
@@ -53,17 +53,13 @@ def response(resp: "SXNG_Response"):
result: dict[str, str] # TBH: dict[str, t.Any]
for result in resp.json()["items"]:
tags = [
tag_info["tag"] for tag_info in result["tags"] if tag_info["tag"] # pyright: ignore[reportArgumentType]
]
res.add(
res.types.Image(
title=result["name"],
content=", ".join(tags),
content=", ".join([tag["tag"] for tag in result["tags"]]), # pyright: ignore[reportArgumentType]
url=_fix_url(result["slug"]),
thumbnail_src=_fix_url(result["png"]),
img_src=_fix_url(result["png512"]),
img_format="PNG",
author=result["team_name"],
)
)
+1 -1
View File
@@ -27,8 +27,8 @@ about = {
'official_api_documentation': None,
'require_api_key': False,
'results': 'HTML',
'language': 'de',
}
language = "de"
paging = True
categories = ['shopping']
-127
View File
@@ -1,127 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Giphy (images)"""
import random
from urllib.parse import urlencode
import re
import typing as t
from lxml import html
from searx.enginelib import EngineCache
from searx.exceptions import SearxEngineAPIException
from searx.network import get
from searx.result_types import EngineResults
from searx.result_types.image import ImageRef
from searx.utils import eval_xpath_list, humanize_bytes
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://giphy.com",
"wikidata_id": "Q17054335",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://giphy.com"
api_url = "https://api.giphy.com"
categories = ["images"]
paging = True
page_size = 15
GiphyCategs = t.Literal["gifs", "stickers", "clips"]
giphy_categ: GiphyCategs = "gifs"
"""Giphy category to search in."""
CACHE: EngineCache
"""Cache for storing the extracted api key."""
_GIPHY_API_KEY_RE = re.compile(r"[Aa]piKey\s*:\s*\"(\w+)\"")
def setup(engine_settings: dict[str, str]) -> bool:
if giphy_categ not in t.get_args(GiphyCategs):
raise ValueError("invalid category: %s" % giphy_categ)
global CACHE # pylint: disable=global-statement
CACHE = EngineCache(engine_settings["name"])
return True
def _get_api_key() -> str:
"""
Extract the Giphy API key from the JavaScript code. There are different API keys
(e.g. for mobile, desktop, ...), so we just pick a random one of these.
"""
cached = CACHE.get("api_key")
if cached:
return cached
homepage_resp = get(base_url)
homepage_doc = html.fromstring(homepage_resp.text)
for script_src in eval_xpath_list(homepage_doc, "//script[contains(@src, 'layout')]/@src"):
script_resp = get(base_url + script_src)
api_keys = _GIPHY_API_KEY_RE.findall(script_resp.text)
if api_keys:
api_key = random.choice(api_keys)
CACHE.set("api_key", api_key, expire=60 * 60 * 6) # 6 hours
return api_key
raise SearxEngineAPIException("failed to extract api keys")
def request(query: str, params: "OnlineParams") -> None:
args = {
"q": query,
"api_key": _get_api_key(),
"limit": page_size,
"offset": (params["pageno"] - 1) * page_size,
"type": giphy_categ,
}
params["url"] = f"{api_url}/v1/{giphy_categ}/search?{urlencode(args)}"
def response(resp: "SXNG_Response"):
res = EngineResults()
result: dict[str, t.Any]
for result in resp.json()["data"]:
img = result['images']['original']
formats = [
ImageRef(url=img["mp4"], subtype="mp4"), # type: ignore
ImageRef(url=img["webp"], subtype="webp"), # type: ignore
]
thumb = (
result["images"].get("downsized")
or result["images"].get("downsized_medium")
or result["images"].get("downsized_small")
or result["images"].get("downsized_large")
)
res.add(
res.types.Image(
title=result["title"],
content=", ".join(result.get("tags", [])),
url=result["url"],
thumbnail_src=thumb.get("url") or img["url"],
img_src=img["url"],
resolution=f"{img['width']}x{img['height']}",
img_format="GIF",
formats=formats,
author=result["username"],
filesize=humanize_bytes(int(img["size"])),
source=result.get("source_tld") or "",
)
)
return res
+1 -8
View File
@@ -10,12 +10,10 @@ import time
import typing as t
from urllib.parse import urlencode
from lxml import html
from searx.result_types import EngineResults
from searx.exceptions import SearxEngineCaptchaException
from searx.extended_types import SXNG_Response
from searx.utils import extr, gen_useragent, html_to_text, eval_xpath
from searx.utils import extr, gen_useragent, html_to_text
from searx.network import get
if t.TYPE_CHECKING:
@@ -42,11 +40,6 @@ time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
def _get_page_hash(query: str, page: int, headers: dict[str, str]) -> str:
resp = get(f"{base_url}/web/result?q={query}&page={page}", headers=headers)
# detect captcha (if any)
doc = html.fromstring(resp.text)
if eval_xpath(doc, "//*[@id='spam-messages']"):
raise SearxEngineCaptchaException()
# the text we search for looks like:
# load("/desk?lang="+eV.p.param['hl']+"&q="+eV['p']['q_encode']+"&page=5&h=aa45603&t=177582576&origin=web&comp=web_serp_pag&p=gmx-com&sp=&lr="+eV.p.param['lr0']+"&mkt="+eV.p.param['mkt0']+"&family="+eV.p.param['familyFilter']+"&fcons="+eV.p.perm.fCons,"google", "eMMO", "eMH","eMP"); # pylint: disable=line-too-long
return extr(resp.text, "&h=", "&t=")
-1
View File
@@ -57,7 +57,6 @@ max_page = 50
.. _Google max 50 pages: https://github.com/searxng/searxng/issues/2982
"""
time_range_support = True
language_support = True
safesearch = True
time_range_dict = {"day": "d", "week": "w", "month": "m", "year": "y"}
-1
View File
@@ -43,7 +43,6 @@ max_page = 50
"""
time_range_support = True
language_support = True
safesearch = True
filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
-1
View File
@@ -66,7 +66,6 @@ about = {
categories = ["news"]
paging = False
time_range_support = False
language_support = True
# Google-News results are always *SafeSearch*. Option 'safesearch' is set to
# False here.
-90
View File
@@ -1,90 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Heexy_ is a minimalist search engine that focuses on privacy.
Although it also supports news and videos, these are not implemented here
because they usually return no result to very few irrelevant ones.
It seems to use Bing internally, as the image thumbnails are loaded from Bing.
.. _Heexy: https://docs.heexy.org/introduction
"""
from urllib.parse import urlencode
import typing as t
from searx.exceptions import SearxEngineAccessDeniedException
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://heexy.org",
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
paging = True
safesearch = True
categories = ["general"]
heexy_categ = "web"
"""Category to search in. Can be either "web" or "image"."""
base_url = "https://seapi.heexy.org"
safe_search_map = {0: "off", 1: "on", 2: "on"}
def init(_):
if heexy_categ not in ("web", "image"):
raise ValueError("invalid search category: %s" % heexy_categ)
def request(query: str, params: "OnlineParams") -> None:
args = {
"q": query,
"page": params["pageno"],
"safe": safe_search_map[params["safesearch"]],
}
if params["searxng_locale"] != "all":
args["lang"] = params["searxng_locale"].split("-")[0]
params["url"] = f"{base_url}/search/{heexy_categ}?{urlencode(args)}"
params["headers"]["Origin"] = base_url
def response(resp: "SXNG_Response"):
res = EngineResults()
json_resp = resp.json()
if not json_resp["success"]:
raise SearxEngineAccessDeniedException()
result: dict[str, str]
for result in json_resp["results"]:
if heexy_categ == "web":
res.add(
res.types.MainResult(
url=result["url"],
title=result["title"],
content=result["description"],
)
)
elif heexy_categ == "image":
res.add(
res.types.Image(
title=result["description"],
url=result["url"],
thumbnail_src=result["image"],
img_src=result["rawImage"],
)
)
return res
+1 -1
View File
@@ -34,8 +34,8 @@ about = {
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
"language": "it",
}
language = "it"
def request(query, params):
+1 -1
View File
@@ -16,8 +16,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
"language": 'fr',
}
language = "fr"
# engine dependent config
categories = ['videos']
+1 -1
View File
@@ -14,9 +14,9 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
"language": "zh",
}
language = "zh"
paging = True
time_range_support = True
categories = ["videos"]
-88
View File
@@ -1,88 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""iseek_ is a search engine by the AI company Vantage Labs LLC,
that focuses on medical and educational applicances.
Although it's an AI company, it doesn't include any AI stuff in its results.
.. _iseek : https://www.iseek.ai/
"""
import base64
from hashlib import sha256
import typing as t
from urllib.parse import urlencode
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
from searx.extended_types import SXNG_Response
about = {
"website": 'https://www.iseek.com',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = ["general"]
paging = True
base_url = "https://api.iseek.com"
page_size = 10
def _get_new_token(query: str, pageno: int) -> str:
"""Create a new ``qToken``. This reduced the time for fetching subsequent pages
from 4 seconds to 200ms when testing."""
# The website uses a random value as qToken for the first page. For our use case,
# it's easier if the qToken can be deterministically re-calculated based on the search query,
# so that we can the same result when calling _get_new_token for the second, third, ... page
#
# var qToken = Math.ceil(Math.random() * parseInt("ZZZZ", 36)).toString(36);
# while (qToken.length < 4) qToken = '0' + qToken;
# qToken = qToken + "_" + pageno
query_hash = sha256(query.encode()).digest()
hash_start = base64.b64encode(query_hash).decode()[0:4]
return f"{hash_start}_{pageno}"
def request(query: str, params: "OnlineParams"):
offset = (params["pageno"] - 1) * page_size
# always seems to find 20 results max
if offset >= 20:
params["url"] = None
return
args = {
"q": query,
"key": "core-web",
"num": str(page_size),
"off": offset,
"rSort": "__metasearch_score_d:desc",
# it supports many more fields, but none of them are really relevant
"names": "title_t,content_txt,url_s",
"qNames": "title_t",
"qToken": _get_new_token(query, params["pageno"]),
}
params["url"] = f"{base_url}/search?{urlencode(args)}"
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
for group in resp.json()["data"]:
group: dict[str, t.Any]
for result in group["doclist"]["docs"]:
result: dict[str, str]
res.add(
res.types.MainResult(
url=result["url_s"],
title=result["title_t"],
content="".join(result["content_txt"]),
)
)
return res
+1 -1
View File
@@ -13,8 +13,8 @@ about = {
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
"language": 'ja',
}
language = "ja"
categories = ['dictionaries']
paging = False
+1 -12
View File
@@ -20,7 +20,6 @@ Paging:
- :py:obj:`paging`
- :py:obj:`page_size`
- :py:obj:`first_page_num`
- :py:obj:`send_page_num_on_first_page`
Time Range:
@@ -79,9 +78,6 @@ from json import loads
from urllib.parse import urlencode
from searx.utils import to_string, html_to_text
from searx.network import raise_for_httperror
from searx.enginelib import EngineAbout
about = EngineAbout()
search_url = None
"""
@@ -173,10 +169,6 @@ number, but an offset.'''
first_page_num = 1
'''Number of the first page (usually 0 or 1).'''
send_page_num_on_first_page = True
'''Whether to include the page number in the request for the first page.
This can help if an engine blocks request that send a page number for the first page.'''
results_query = ''
'''JSON query for the list of result items.
@@ -330,13 +322,10 @@ def request(query, params): # pylint: disable=redefined-outer-name
if params['safesearch']:
safe_search = safe_search_map[params['safesearch']]
pageno = ""
if send_page_num_on_first_page or params["pageno"] != 1:
pageno = (params['pageno'] - 1) * page_size + first_page_num
fp = { # pylint: disable=invalid-name
'query': urlencode({'q': query})[2:],
'lang': lang,
'pageno': pageno,
'pageno': (params['pageno'] - 1) * page_size + first_page_num,
'time_range': time_range,
'safe_search': safe_search,
}
-190
View File
@@ -1,190 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Kagi_ is a paid, privacy-focused search engine.
Using it requires an API key. If you have a Kagi account, you can obtain an API
key in the `API portal`_.
To enable Kagi, add the following to the ``engines`` seciton of
``settings.yml``:
.. code:: yaml
- name: kagi
engine: kagi
categories: [general, web]
shortcut: kg
api_key: ""
kagi_categ: search
- name: kagi.news
engine: kagi
categories: [news, web]
shortcut: kgn
api_key: ""
kagi_categ: news
- name: kagi.images
engine: kagi
categories: [images, web]
shortcut: kgi
paging: false
api_key: ""
kagi_categ: images
- name: kagi.videos
engine: kagi
categories: [videos, web]
shortcut: kgv
api_key: ""
kagi_categ: videos
.. _Kagi: https://kagi.com
.. _Api Portal: https://help.kagi.com/kagi/api/overview.html
"""
from datetime import datetime, timedelta
import typing as t
import html
from searx.extended_types import SXNG_Response
from searx.result_types import EngineResults
from searx.utils import parse_duration_string
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
TimeRangeType = t.Literal["day", "week", "month", "year"]
about = {
"website": "https://kagi.com",
"wikidata_id": "Q26000117",
"official_api_documentation": "https://kagi.com/api/docs/openapi",
"use_official_api": True,
"require_api_key": True,
"results": "JSON",
}
paging = True
"""All categories except the ``images`` category support paging."""
safesearch = True
time_range_support = True
categories = ["general"]
kagi_categ: t.Literal["search", "images", "news", "videos"] = "search"
"""Search category. Supported values: "search" (general), "images", "news", "videos"."""
base_url = "https://kagi.com"
safe_search_map = {0: False, 1: True, 2: True}
time_range_to_days_map: dict[TimeRangeType, int] = {"day": 1, "week": 7, "month": 30, "year": 365}
api_key = ""
"""Kagi API key. Required for using this engine."""
def init(_):
if not api_key:
raise ValueError("api_key is required for using kagi")
if kagi_categ not in ("search", "images", "news", "videos"):
raise ValueError(f"Unsupported category: {kagi_categ}") # pyright: ignore[reportUnreachable]
def request(query: str, params: "OnlineParams"):
# According to the API docs, Kagi supports at maximum page 10
if params["pageno"] > 10:
return
params["headers"]["Authorization"] = f"Bearer {api_key}"
params["url"] = f"{base_url}/api/v1/search"
filters = {}
time_range = params.get("time_range")
if time_range:
# Kagi expects the minimum date to return results from as argument to `after`
time_period = timedelta(days=time_range_to_days_map[time_range])
oldest_result_date = datetime.now() - time_period
filters["after"] = oldest_result_date.strftime("%Y-%m-%d")
# there doesn't seem to be a list of languages anywhere,
# so we just assume that it supports all languages
filters["region"] = "no_region"
if params["searxng_locale"] != "all":
_locale = params["searxng_locale"].split("-")
if len(_locale) > 1:
filters["region"] = _locale[-1].lower()
args: dict[str, t.Any] = {
"query": query,
"page": params["pageno"],
"workflow": kagi_categ,
"safe_search": safe_search_map[params["safesearch"]],
"filters": filters,
}
params["method"] = "POST"
params["json"] = args
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
json_data: dict[str, t.Any] = resp.json()
if kagi_categ in ("images", "videos"):
# the JSON key is "image" for "images" and "video" for "videos"
json_results = json_data["data"][kagi_categ[:-1]]
else:
json_results = json_data["data"][kagi_categ]
for result in json_results:
published_date: datetime | None = None
if result.get("time"):
published_date = datetime.fromisoformat(result["time"])
if kagi_categ in ("search", "news"):
res.add(
res.types.MainResult(
url=result["url"],
title=html.unescape(result["title"]),
content=html.unescape(result["snippet"]),
thumbnail=result.get("image", {}).get("url") or "",
publishedDate=published_date,
)
)
elif kagi_categ == "images":
res.add(
res.types.Image(
url=result["url"],
title=html.unescape(result.get("title")),
img_src=result.get("image", {}).get("url"),
resolution=f"{result['image']['width']}x{result['image']['height']}",
thumbnail_src=result.get("props", {}).get("thumbnail", {}).get("url"),
)
)
elif kagi_categ == "videos":
length: timedelta | None = None
if result["props"].get("duration"):
length = parse_duration_string(result["props"]["duration"])
res.add(
res.types.LegacyResult(
{
"template": "videos.html",
"url": result["url"],
"title": html.unescape(result["title"]),
"content": html.unescape(result["snippet"]),
"thumbnail": result.get("image", {}).get("url"),
"publishedDate": published_date,
"author": result["props"].get("creator_name"),
"length": length,
}
)
)
for suggestion in json_data["data"].get("related_search", []):
res.add(res.types.LegacyResult({"suggestion": suggestion["title"]}))
return res
+205
View File
@@ -0,0 +1,205 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Karmasearch uses Brave's index, so the results should be the same as Brave's.
However, the advantages of this engine are:
- it has less strict rate-limits
- it has a JSON API, so it's less likely to break
"""
from datetime import datetime
from urllib.parse import urlencode
import typing as t
from dateutil import parser
from searx.enginelib.traits import EngineTraits
from searx.utils import html_to_text
from searx.result_types import EngineResults, MainResult
from searx.result_types._base import LegacyResult
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://karmasearch.org",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://api.karmasearch.org"
categories = ["web", "general"]
search_type = "web" # supported: web, images, videos, news
# all types except "images" support pagination
paging = True
safesearch = True
time_range_support = True
safe_search_map = {0: "off", 1: "moderate", 2: "strict"}
time_range_map = {"day": "Day", "week": "Week", "month": "Month", "year": "Year"}
def init(_):
if search_type not in ("web", "images", "videos", "news"):
raise ValueError(f"invalid search type: {search_type}")
def request(query: str, params: "OnlineParams") -> None:
engine_region: str = traits.get_region(params["searxng_locale"]) or "en-US"
args: dict[str, str | int] = {
"searchTerm": query,
"adultFilter": safe_search_map[params["safesearch"]],
"pageNumber": params["pageno"],
"country": engine_region.split("-")[-1],
"userLanguage": "en", # UI language: en, es or fr / no effect on search results
"market": engine_region,
}
if params["time_range"]:
args["freshness"] = time_range_map[params["time_range"]]
# Needed to circumvent Cloudflare bot protection
params['headers']['Referer'] = "https://karmasearch.org"
params["url"] = f"{base_url}/search/{search_type}?{urlencode(args)}"
def _parse_date(date_string: str) -> datetime | None:
try:
return parser.parse(date_string)
except parser.ParserError:
return None
def _parse_general(result: dict[str, str]):
return MainResult(
url=result["url"],
title=result["title"],
content=html_to_text(result["description"]),
thumbnail=result.get("thumbnail", ""),
)
def _parse_news(result: dict[str, str]) -> LegacyResult:
return LegacyResult(
{
"url": result["url"],
"title": result["title"],
"content": html_to_text(result["description"]),
"thumbnail": result.get("thumbnail"),
"publishedDate": _parse_date(result.get("age", "")),
}
)
def _parse_videos(result: dict[str, t.Any]) -> LegacyResult:
return LegacyResult(
{
"template": "videos.html",
"url": result["url"],
"title": result["title"],
"content": html_to_text(result["description"]),
"thumbnail": result.get("thumbnail"),
"publishedDate": _parse_date(result.get("age", "")),
"length": result.get("video", {}).get("duration"),
}
)
def _parse_images(result: dict[str, t.Any]) -> LegacyResult:
return LegacyResult(
{
"template": "images.html",
"url": result["url"],
"title": result["title"],
"content": "",
"img_src": result.get("properties", {}).get("url"),
"thumbnail_src": result.get("thumbnail", {}).get("src"),
}
)
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
json_resp: dict[str, t.Any] = resp.json()
if not isinstance(json_resp, dict):
return res # pyright: ignore[reportUnreachable]
for result in json_resp["results"]:
# hide sponsored results
if result.get("sponsored", False):
continue
if "videos" in result:
for videos_result in result["videos"]:
res.add(_parse_videos(videos_result))
continue
if "news" in result:
for news_result in result["news"]:
res.add(_parse_news(news_result))
continue
if search_type == "news":
res.add(_parse_news(result))
elif search_type == "videos":
res.add(_parse_videos(result))
elif search_type == "images":
res.add(_parse_images(result))
else:
res.add(_parse_general(result))
return res
def fetch_traits(engine_traits: EngineTraits):
"""Fetch :ref:`languages <brave languages>` and :ref:`regions <brave
regions>` from Brave."""
# pylint: disable=import-outside-toplevel, too-many-branches
from lxml import html
import babel
from searx.locales import region_tag
from searx.network import get # see https://github.com/searxng/searxng/issues/762
# from searx.engines.xpath import extract_text
from searx.utils import gen_useragent
headers = {
"Accept-Encoding": "gzip, deflate",
"Cache-Control": "no-cache",
"DNT": "1",
"Connection": "keep-alive",
"Accept-Language": "en,en-US;q=0.7,en;q=0.3",
"User-Agent": gen_useragent(),
}
resp = get("https://karmasearch.org/settings", headers=headers, timeout=5)
if not resp.ok:
raise RuntimeError("Response from Brave languages is not OK.")
dom = html.fromstring(resp.text)
for option in dom.xpath("//select[@name='country']/option"):
country_tag: str = option.get("value", "")
try:
sxng_tag = region_tag(babel.Locale.parse(country_tag, sep="-"))
except babel.UnknownLocaleError:
# silently ignore unknown languages
continue
# print("%-20s: %s <-- %s" % (extract_text(option), country_tag, sxng_tag))
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != country_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, country_tag))
continue
engine_traits.regions[sxng_tag] = country_tag
-210
View File
@@ -1,210 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Luxxle_ is an American search engine focusing on providing "unbiased"
results.
.. _Luxxle: https://luxxle.com
"""
from json import dumps
from urllib.parse import quote_plus, unquote_plus
import typing as t
from lxml import html
from searx.result_types import EngineResults
from searx.network import get
from searx.utils import (
extr,
gen_useragent,
eval_xpath_list,
extract_text,
eval_xpath,
parse_duration_string,
ElementType,
)
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
from searx.extended_types import SXNG_Response
about = {
"website": "https://luxxle.com",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
categories = []
safeseach = True
base_url = "https://luxxle.com"
luxxle_categ = "search"
"""Supported categories: "search", "news", "images", "videos"."""
# otherwise all requests get blocked (http2-fingerprinted probably)
enable_http2 = False
safe_search_map = {0: "Off", 1: "Moderate", 2: "Strict"}
def init(_):
if luxxle_categ not in ("search", "images", "videos", "news"):
raise ValueError("invalid luxxle category: %s" % luxxle_categ)
def _obtain_telemetry_data(query: str) -> dict[str, str]:
"""This data is required for sending search queries.
The luxsearch page (for general results) has a JS dict called ``telemetryData``
that contains all the important info, but the others don't, so we don't use it
here. But it's useful to understand which info is needed.
.. code-block:: javascript
var telemetryData = {
errorInformation: errorInformation,
query: "youapps club",
ip: "10.10.10.10",
timeOf: "1781119224",
authorization: "db889e0ae67d3c320858ad97f51cc4f0a4d8e1913c4f5ebe5d2eafef606521dd",
};
This data is only valid for very short times
"""
resp = get(
f"{base_url}/lux{luxxle_categ}?q={quote_plus(query)}", headers={"User-Agent": gen_useragent(), "Sec-GPC": "1"}
)
def extr_js_variable(name: str) -> str:
val = extr(resp.text, f"var {name} = \"", "\";")
if not val:
val = extr(resp.text, f"var {name} = '", "';")
return val
return {
"ip": extr_js_variable("ip"),
"timeOf": extr_js_variable("timeOf"),
"authorization": extr_js_variable("authorization"),
"preferencesCookie": extr_js_variable("preferencesCookie"),
}
def request(query: str, params: "OnlineParams") -> None:
telemetry_data = _obtain_telemetry_data(query)
market = params["searxng_locale"]
if market == "all":
market = "en-US"
params["url"] = f"{base_url}/load_{luxxle_categ}.php"
search_data = {
**telemetry_data,
"query": query,
"market": market,
"safeSearch": safe_search_map[params["safesearch"]],
"freshness": "",
"language": "english", # UI language
}
if luxxle_categ == "images":
# for some reason this is sent as form data
params["data"] = {"searchData": dumps(search_data)}
else:
params["json"] = {"searchData": search_data}
params["method"] = "POST"
def _extract_url_from_redirect(url: str):
# urls usually look like "/redirect?url=<url>"
query_start_idx = url.find("?url=")
if query_start_idx < 0:
return url
url_start_idx = query_start_idx + len("?url=")
return unquote_plus(url[url_start_idx:])
def _general_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(doc, "//div[@id='mainResults']/div[contains(@class, 'resultsContainer')]"):
res.add(
res.types.MainResult(
url=_extract_url_from_redirect(
extract_text(eval_xpath(result, "./div[contains(@class, 'urlAddressLink')]/a/@href")) or ""
),
title=extract_text(eval_xpath(result, "./div[contains(@class, 'urlname')]")) or "",
content=extract_text(eval_xpath(result, "./div[contains(@class, 'urlSnippet')]")) or "",
)
)
def _news_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(
doc, "//div[contains(@class, 'newsResults')]/div[contains(@class, 'mediaResultNewsPage')]"
):
res.add(
res.types.MainResult(
url=_extract_url_from_redirect(
extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageTitle')]/a/@href"))
or ""
),
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageTitle')]/a")) or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageDescription')]"))
or "",
thumbnail=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultThumbnail')]//img/@src"))
or "",
)
)
def _video_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(doc, "//div[@id='mainResults']/div[contains(@class, 'mediaResult')]"):
res.add(
res.types.MainResult(
template="videos.html",
url=extract_text(eval_xpath(result, "./@data-url")) or "",
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultTitleVideo')]/a")) or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultDescription')]")) or "",
thumbnail=extract_text(eval_xpath(result, ".//img[contains(@class, 'videoThumbnail')]/@src")) or "",
author=extract_text(eval_xpath(result, ".//div[contains(@class, 'videoCreator')]")) or "",
length=parse_duration_string(
extract_text(eval_xpath(result, ".//span[contains(@class, 'mediaResultDuration')]")) or ""
),
)
)
def _image_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(doc, "//div[contains(@class, 'imageResultsWrapper')]/div"):
res.add(
res.types.Image(
url=_extract_url_from_redirect(
extract_text(eval_xpath(result, ".//a[contains(@class, 'imageResultSource')]/@href")) or ""
),
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'imageResultTitle')]")) or "",
source=extract_text(eval_xpath(result, ".//div[contains(@class, 'imageResultSource')]")) or "",
thumbnail_src=extract_text(eval_xpath(result, "./@data-thumbnail-src")) or "",
img_src=extract_text(eval_xpath(result, "./@data-image-src")) or "",
)
)
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
res = EngineResults()
match luxxle_categ:
case "search":
_general_results(doc, res)
case "images":
_image_results(doc, res)
case "videos":
_video_results(doc, res)
case "news":
_news_results(doc, res)
case _:
raise ValueError("unsupported category: %s" % luxxle_categ)
return res
+3 -8
View File
@@ -44,8 +44,8 @@ about = {
base_url = "https://api2.marginalia-search.com"
safesearch = True
categories = ["general", "blogs"]
paging = True
categories = ["general"]
paging = False
results_per_page = 20
api_key = None
"""To get an API key, please follow the instructions from `Key and license`_
@@ -85,12 +85,7 @@ class ApiSearchResults(t.TypedDict):
def request(query: str, params: dict[str, t.Any]):
query_params = {
"page": params["pageno"],
"count": results_per_page,
"nsfw": min(params["safesearch"], 1),
"query": query,
}
query_params = {"count": results_per_page, "nsfw": min(params["safesearch"], 1), "query": query}
params["url"] = f"{base_url}/search?{urlencode(query_params)}"
params["headers"]["User-Agent"] = searxng_useragent()
+1 -1
View File
@@ -11,9 +11,9 @@ about = {
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
"language": "de",
}
language = "de"
categories = ['videos']
paging = True
time_range_support = False
-1
View File
@@ -20,7 +20,6 @@ about = {
}
paging = True # paging is only supported for general search
safesearch = True
language_support = True
time_range_support = True # time range search is supported for general and news
max_page = 10
+1 -2
View File
@@ -35,9 +35,8 @@ about = {
'use_official_api': False,
'require_api_key': False,
'results': 'JSON',
'language': 'de',
}
language = "de"
paging = True
categories = ["movies"]
+1 -1
View File
@@ -26,8 +26,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "ko",
}
language = "ko"
categories = []
paging = True
+1 -1
View File
@@ -13,8 +13,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "ja",
}
language = "ja"
categories = ["videos"]
paging = True
+10 -2
View File
@@ -4,6 +4,7 @@
.. _Odysee: https://github.com/OdyseeTeam/odysee-frontend
"""
import time
from datetime import datetime
from urllib.parse import urlencode
@@ -11,7 +12,6 @@ import babel
from searx.enginelib.traits import EngineTraits
from searx.locales import language_tag
from searx.utils import format_duration
# Engine metadata
about = {
@@ -26,7 +26,6 @@ about = {
# Engine configuration
paging = True
time_range_support = True
language_support = True
results_per_page = 20
categories = ["videos"]
@@ -62,6 +61,15 @@ def request(query, params):
return params
# Format the video duration
def format_duration(duration):
seconds = int(duration)
length = time.gmtime(seconds)
if length.tm_hour:
return time.strftime("%H:%M:%S", length)
return time.strftime("%M:%S", length)
def response(resp):
data = resp.json()
results = []
-1
View File
@@ -25,7 +25,6 @@ about = {
"require_api_key": False,
"results": "JSON",
}
language_support = True
# engine dependent config
categories = ["videos"]
+2 -4
View File
@@ -9,7 +9,7 @@ from lxml import html
from searx.result_types import EngineResults
from searx.utils import eval_xpath_list, gen_useragent
from searx.enginelib import EngineCache
from searx.exceptions import SearxEngineAPIException, SearxEngineAccessDeniedException
from searx.exceptions import SearxEngineAPIException
from searx.network import get
@@ -58,8 +58,6 @@ def _get_secret_key():
# circumvents Cloudflare bot protections
"User-Agent": gen_useragent(),
"Referer": base_url,
"Sec-GPC": "1",
"Connection": "keep-alive",
},
)
@@ -97,7 +95,7 @@ def request(query, params):
try:
secret_key = _get_secret_key()
CACHE.set(SECRET_KEY_DB_KEY, secret_key)
except (SearxEngineAPIException, SearxEngineAccessDeniedException) as e:
except SearxEngineAPIException as e:
logger.debug("failed to extract API key %s" % e)
secret_key = api_key
+2 -2
View File
@@ -28,7 +28,7 @@ search_string = 'api/?{query}&limit={limit}'
result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
# list of supported languages
photon_supported_languages = ["de", "en", "fr", "it"]
supported_languages = ['de', 'en', 'fr', 'it']
# do search-request
@@ -37,7 +37,7 @@ def request(query, params):
if params['language'] != 'all':
language = params['language'].split('_')[0]
if language in photon_supported_languages:
if language in supported_languages:
params['url'] = params['url'] + "&lang=" + language
# using SearXNG User-Agent
-62
View File
@@ -1,62 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Podchaser (podcasts)"""
import typing as t
from datetime import datetime
from urllib.parse import urlencode
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://www.podchaser.com",
"official_api_documentation": "https://www.podchaser.com/api",
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = []
paging = True
base_url = "https://api.podchaser.com"
page_size = 25
def request(query: str, params: "OnlineParams") -> None:
args = {
"filters[term]": query,
"limit": page_size,
"offset": (params["pageno"] - 1) * page_size,
"sort_direction": "desc",
"sort_order": "SORT_ORDER_RELEVANCE",
}
params["url"] = f"{base_url}/podcasts?{urlencode(args)}"
params["headers"]["Accept"] = "application/prs.podchaser.v2+json"
def response(resp: "SXNG_Response"):
res = EngineResults()
json_results: list[dict[str, str]] = resp.json()["entities"] # pyright: ignore[reportAny]
for result in json_results:
metadata = [f"{result['number_of_episodes']} episodes"]
if result["categories"]:
metadata.append(", ".join(c["text"] for c in result["categories"])) # pyright: ignore[reportArgumentType]
res.add(
res.types.MainResult(
url=result["feed_url"],
title=result["title"],
content=result["description"],
thumbnail=result["image_url"],
publishedDate=datetime.strptime(result["created_at"], "%Y-%m-%d %H:%M:%S"),
metadata=" | ".join(metadata),
)
)
return res
+1 -1
View File
@@ -77,7 +77,7 @@ from searx.utils import gen_useragent, html_to_text, parse_duration_string
about = {
"website": "https://presearch.io",
"wikidata_id": "Q7240905",
"wikidiata_id": "Q7240905",
"official_api_documentation": "https://docs.presearch.io/nodes/api",
"use_official_api": False,
"require_api_key": False,
-217
View File
@@ -1,217 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Privacywall_ claims to be a "privacy-friendly" search engine,
but according to a `Privacyguides discussion`_ it's sharing private
user information with Microsoft and Amazon.
.. _Privacywall : https://www.privacywall.org
.. _`Privacyguides discussion` : https://discuss.privacyguides.net/t/how-is-privacy-wall-search-engine/29486
"""
import typing as t
from urllib.parse import urlencode, unquote_plus
from lxml import html
import babel
from searx.enginelib.traits import EngineTraits
from searx.utils import eval_xpath_list, eval_xpath, extract_text, get_embeded_stream_url, extr
from searx.locales import region_tag
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from lxml.etree import ElementBase
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://privacywall.org",
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
paging = True
safesearch = True
time_range_support = True
base_url = "https://www.privacywall.org"
privacywall_category = "general"
"""Supported categories are ``general``, ``videos`` and ``images``."""
# corresponds to the "k" query param
safesearch_map = {0: "off", 1: "on", 2: "on"}
# page number sent for videos (is independent of the query) - certainly there's
# a pattern in this, but for our use case it's enough to just support the first
# 10 pages by hardcoding the page "numbers"
video_page_map = {
2: "CAoQAA",
3: "CBQQAA",
4: "CB4QAA",
5: "CCgQAA",
6: "CDIQAA",
7: "CDwQAA",
8: "CEYQAA",
9: "CFAQAA",
10: "CFoQAA",
}
def init(_):
if privacywall_category not in ("general", "images", "videos"):
raise ValueError("invalid category: %s" % privacywall_category)
def request(query: str, params: "OnlineParams") -> None:
if params["pageno"] > 10:
params["url"] = None
return
args = {"q": query, "safesearch": safesearch_map[params["safesearch"]]}
if params["searxng_locale"] != "all":
args["cc"] = traits.get_region(params["searxng_locale"]) or "US"
if params["time_range"]:
# time range uses the same "day", "week", "month", "year" naming scheme as SearXNG
args["time"] = params["time_range"]
if params["pageno"] > 1:
if privacywall_category == "images":
args["page"] = str(params["pageno"])
elif privacywall_category == "videos":
args["page"] = video_page_map[params["pageno"]]
else:
raise ValueError("general engine does not support pagination")
if privacywall_category == "general":
params["url"] = f"{base_url}/search/secure/?{urlencode(args)}"
else:
params["url"] = f"{base_url}/{privacywall_category}/?{urlencode(args)}"
def _general_results(doc: "ElementBase") -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(doc, "//div[@id='pw-results-main']/div[contains(@class, 'result-card')]"):
(
res.add(
res.types.MainResult(
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'result-url-anchor')]/@href")) or "",
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'result_title')]")) or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'result-description')]")) or "",
),
)
)
return res
def _extract_thumbnail_url(url: str) -> str:
"""
Get the URL from strings like "/videos/video.php?id=<urlencoded-urlhere>".
"""
url_start = url.find("?id=") + len("?id=")
thumbnail = unquote_plus(url[url_start:])
return thumbnail
def _image_results(doc: "ElementBase") -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(doc, "//div[@id='container']/div[contains(@class, 'imgcontainer')]"):
(
res.add(
res.types.Image(
url=extract_text(eval_xpath(result, "./a/@href")) or "",
content=extract_text(eval_xpath(result, "./a/@alt")) or "",
thumbnail_src=_extract_thumbnail_url(extract_text(eval_xpath(result, ".//img/@src")) or ""),
source=extract_text(eval_xpath(result, ".//div[contains(@class, 'image-source-badge')]")) or "",
),
)
)
return res
def _video_results(doc: "ElementBase") -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(
doc, "//div[contains(@class, 'video-container')]/div[contains(@class, 'video-card')]"
):
url = extract_text(eval_xpath(result, "./a/@href")) or ""
if not url:
continue
thumbnail = None
# looks like <div style="background-image:url(/videos/video.php?id=<urlencoded-urlhere>);position:relative">
thumbnail_style = extract_text(eval_xpath(result, ".//div[contains(@class, 'video-img')]/@style"))
if thumbnail_style:
thumbnail = _extract_thumbnail_url(extr(thumbnail_style, ":url(", ")"))
res.add(
res.types.LegacyResult(
template="videos.html",
url=url,
title=extract_text(eval_xpath(result, ".//h2[contains(@class, 'video-card-title')]")) or "",
content=extract_text(eval_xpath(result, ".//p")) or "",
thumbnail=thumbnail or "",
iframe_src=get_embeded_stream_url(url) or "",
)
)
return res
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
match privacywall_category:
case "general":
return _general_results(doc)
case "images":
return _image_results(doc)
case "videos":
return _video_results(doc)
case _:
raise ValueError("invalid category: %s" % privacywall_category)
def fetch_traits(engine_traits: EngineTraits) -> None:
"""Fetch regions from Bing-Web."""
# pylint: disable=import-outside-toplevel
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.utils import gen_useragent
headers = {
"User-Agent": gen_useragent(),
}
resp = get(base_url, headers=headers)
if not resp.ok:
raise RuntimeError("Response from Privacywall is not OK.")
dom = html.fromstring(resp.text)
# <div class="dropdown-option" onclick="changeMenuLanguage(&quot;CZ&quot;)"></div>
for onclick_listener in eval_xpath(
dom, "//div[contains(@class, 'lang-menu')]//div[contains(@class, 'dropdown-option')]/@onclick"
):
# this is either a normal lang-country tag (e.g. cs-cz) or only a country code (e.g. de, at, ...)
country_tag = extr(onclick_listener, "(\"", "\")")
# the locale tag is only a country tag, so we get languages the from the list of official languages
# of the country
lang_tag: str
for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True): # pyright: ignore
try:
sxng_tag = region_tag(babel.Locale.parse(f"{lang_tag}_{country_tag.upper()}"))
except babel.UnknownLocaleError:
# silently ignore unknown languages
continue
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != sxng_tag:
print("CONFLICT: babel %s --> %s" % (sxng_tag, conflict))
continue
engine_traits.regions[sxng_tag] = country_tag
+3 -8
View File
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Public domain image archive"""
import re
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import dumps
@@ -51,8 +49,6 @@ paging = True
__CACHED_API_URL = None
_API_URL_RE = re.compile(r"\"(https://.*?/search-proxy)\"")
def _clean_url(url):
parsed = urlparse(url)
@@ -78,11 +74,10 @@ def _get_algolia_api_url():
if resp.status_code != 200:
raise LookupError("Failed to obtain AWS api url for PDImageArchive")
api_url_match = _API_URL_RE.search(resp.text)
if api_url_match is None:
raise LookupError("Couldn't obtain AWS api url for PDImageArchive")
api_url = extr(resp.text, 'const r="', '"', default=None)
api_url = api_url_match.group(1)
if api_url is None:
raise LookupError("Couldn't obtain AWS api url for PDImageArchive")
__CACHED_API_URL = api_url
return api_url
+1 -1
View File
@@ -16,8 +16,8 @@ about = {
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
"language": "zh",
}
language = "zh"
# Engine Configuration
categories = []
+1 -15
View File
@@ -6,7 +6,6 @@
"""
import os
import random
import socket
from urllib.parse import urlencode
@@ -26,7 +25,6 @@ about = {
"require_api_key": False,
"results": "JSON",
}
language_support = True
paging = True
categories = ["music", "radio"]
@@ -61,19 +59,7 @@ seconds."""
def init(_):
global CACHE # pylint: disable=global-statement
CACHE = EngineCache("radio_browser")
# In an environment with competing processes, the initial loading of the
# cache is required only once.
eng_state: str | None = CACHE.get("eng_state")
if not eng_state or not eng_state.startswith("STATE:"):
CACHE.set("eng_state", f"STATE: being initialized by PID {os.getpid()}")
try:
server_list()
except Exception:
CACHE.set("eng_state", f"ERROR: initialization by PID {os.getpid()} failed.")
raise
else:
logger.debug(eng_state)
server_list()
def server_list() -> list[str]:
-120
View File
@@ -1,120 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Resulthunter_ is an American search engine with results from Brave.
.. _Resulthunter : https://resulthunter.com
"""
import typing as t
from urllib.parse import urlencode
from lxml import html
from searx import locales
from searx.result_types import EngineResults
from searx.utils import eval_xpath_list, eval_xpath, extract_text
# as it uses brave internally, it has the same locales and timerange/safesearch types
from searx.engines.brave import safesearch_map, time_range_map, fetch_traits # pylint: disable=unused-import
if t.TYPE_CHECKING:
from lxml.etree import ElementBase
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
from searx.enginelib.traits import EngineTraits
traits: EngineTraits
about = {
"website": "https://resulthunter.com",
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
paging = True
safesearch = True
time_range_support = True
base_url = "https://resulthunter.com"
resulthunter_categ = "web"
"""Supported categories are ``web`` and ``images``."""
def init(_):
if resulthunter_categ not in ("web", "images"):
raise ValueError("invalid category: %s" % resulthunter_categ)
def request(query: str, params: "OnlineParams") -> None:
args = {
"q": query,
"search_type": resulthunter_categ,
"offset": params["pageno"] - 1,
}
# uses Brave's engine traits
ui_lang = locales.get_engine_locale(params["searxng_locale"], traits.custom["ui_lang"], "all")
if ui_lang and ui_lang != "all":
args["search_lang"] = ui_lang.split("-")[0]
engine_region = traits.get_region(params["searxng_locale"], "all")
if engine_region and engine_region != "all":
args["country"] = engine_region
if params["time_range"]:
args["freshness"] = time_range_map[params["time_range"]]
params["cookies"]["safesearch"] = safesearch_map[params["safesearch"]]
params["url"] = f"{base_url}/search?{urlencode(args)}"
def _general_results(doc: "ElementBase") -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(
doc, "//div[contains(@class, 'organic-results-container')]/div/div[contains(@class, 'group')]"
):
url = extract_text(eval_xpath(result, ".//a/@href"))
if not url:
continue
(
res.add(
res.types.MainResult(
url=url,
title=extract_text(eval_xpath(result, ".//a/h3")) or "",
content=extract_text(eval_xpath(result, ".//p")) or "",
),
)
)
return res
def _image_results(doc: "ElementBase") -> EngineResults:
res = EngineResults()
for result in eval_xpath_list(
doc, "//div[contains(@class, 'organic-results-container')]//a[contains(@class, 'group')]"
):
(
res.add(
res.types.Image(
url=extract_text(eval_xpath(result, "./@href")) or "",
title=extract_text(eval_xpath(result, "./img/@alt")) or "",
thumbnail_src=extract_text(eval_xpath(result, "./img/@src")) or "",
),
)
)
return res
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
match resulthunter_categ:
case "web":
return _general_results(doc)
case "images":
return _image_results(doc)
case _:
raise ValueError("invalid resulthunter category: %s" % resulthunter_categ)
-98
View File
@@ -1,98 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Search engines by System1 (general).
System1 is an advertising company, and provides all its search engines as a
subdomain of ``s1search.co``. As a result, it has more than 1000 subdomains, of
which some work, and some don't.
Some of the engines get their results from Google, others get them from Yahoo.
"""
import typing as t
from urllib.parse import urlencode, urlparse, parse_qs
from lxml import html
from searx.result_types import EngineResults
from searx.enginelib import EngineCache
from searx.utils import eval_xpath_list, eval_xpath, extract_text
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
from searx.extended_types import SXNG_Response
about = {
"website": "https://s1search.co",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
base_url = "" # alternatively: search.gmx.net
categories = ["general"]
paging = True
CACHE: EngineCache
"""Cache to store verification tokens for pagination."""
def init(_):
if not base_url:
raise ValueError("base_url must be set")
def setup(engine_settings: dict[str, t.Any]) -> bool:
global CACHE # pylint: disable=global-statement
CACHE = EngineCache(engine_settings["name"])
return True
def _cache_key(query: str, pageno: int) -> str:
return f"{query}|{pageno}"
def request(query: str, params: "OnlineParams"):
args = {"q": query, "page": params["pageno"]}
if params["pageno"] > 1:
sc = CACHE.get(_cache_key(query, params["pageno"]))
# sc is required for pagination to avoid rate-limits
if not sc:
params["url"] = None
return
args["sc"] = sc
params["url"] = f"{base_url}/serp?{urlencode(args)}"
def response(resp: "SXNG_Response") -> EngineResults:
res = EngineResults()
doc = html.fromstring(resp.text)
for suggestion in eval_xpath_list(doc, "//div[@class='aylf-yahoo-bottom' or @class='aylf-yahoo-sidebar']/div"):
res.add(res.types.LegacyResult({"suggestion": extract_text(suggestion)}))
for result in eval_xpath_list(
doc, "//div[contains(@class, 'web-yahoo') or contains(@class, 'web-google')]/div[contains(@class, '__result')]"
):
res.add(
res.types.MainResult(
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]/@href")),
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'title')]")),
content=extract_text(eval_xpath(result, ".//span[contains(@class, 'description') or @class='']")),
)
)
# store pagination keys to be able to access next pages
for page_href in eval_xpath_list(doc, "//a[contains(@class, 'pagination__num')]"):
# target_url looks like "/serp?q=test&page=2&sc=RVlBPMDPVhWR20"
target_url = extract_text(eval_xpath(page_href, "./@href"))
target_url = parse_qs(urlparse(target_url).query)
pageno = int(target_url["page"][0])
sc = target_url["sc"][0]
CACHE.set(_cache_key(resp.search_params["query"], pageno), sc)
return res

Some files were not shown because too many files have changed in this diff Show More