mirror of
https://github.com/searxng/searxng.git
synced 2026-05-07 18:03:51 +02:00
1353 lines
148 KiB
HTML
1353 lines
148 KiB
HTML
<!doctype html>
|
||
<html class="no-js" lang="en" data-content_root="../../">
|
||
<head><meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../genindex.html"><link rel="search" title="Search" href="../../search.html">
|
||
<link rel="prefetch" href="../../_static/searxng-wordmark.svg" as="image">
|
||
|
||
<!-- Generated with Sphinx 9.1.0 and Furo 2025.12.19 -->
|
||
<title>searx.utils - SearXNG Documentation (2026.5.7+ef6290c8c)</title>
|
||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=d111a655" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo.css?v=7bdb33bb" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/styles/furo-extensions.css?v=8dab3a3b" />
|
||
<link rel="stylesheet" type="text/css" href="../../_static/searxng.css?v=4b1b1f10" />
|
||
|
||
|
||
|
||
|
||
<style>
|
||
body {
|
||
--color-code-background: #f2f2f2;
|
||
--color-code-foreground: #1e1e1e;
|
||
|
||
}
|
||
@media not print {
|
||
body[data-theme="dark"] {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
@media (prefers-color-scheme: dark) {
|
||
body:not([data-theme="light"]) {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
}
|
||
}
|
||
</style></head>
|
||
<body>
|
||
|
||
<script>
|
||
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
|
||
</script>
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-toc" viewBox="0 0 24 24">
|
||
<title>Contents</title>
|
||
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
|
||
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-menu" viewBox="0 0 24 24">
|
||
<title>Menu</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
|
||
<line x1="3" y1="12" x2="21" y2="12"></line>
|
||
<line x1="3" y1="6" x2="21" y2="6"></line>
|
||
<line x1="3" y1="18" x2="21" y2="18"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
|
||
<title>Expand</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
|
||
<polyline points="9 18 15 12 9 6"></polyline>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24">
|
||
<title>Light mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24">
|
||
<title>Dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun-with-moon" viewBox="0 0 24 24">
|
||
<title>Auto light/dark, in light mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round"
|
||
class="icon-custom-derived-from-feather-sun-and-tabler-moon">
|
||
<path style="opacity: 50%" d="M 5.411 14.504 C 5.471 14.504 5.532 14.504 5.591 14.504 C 3.639 16.319 4.383 19.569 6.931 20.352 C 7.693 20.586 8.512 20.551 9.25 20.252 C 8.023 23.207 4.056 23.725 2.11 21.184 C 0.166 18.642 1.702 14.949 4.874 14.536 C 5.051 14.512 5.231 14.5 5.411 14.5 L 5.411 14.504 Z"/>
|
||
<line x1="14.5" y1="3.25" x2="14.5" y2="1.25"/>
|
||
<line x1="14.5" y1="15.85" x2="14.5" y2="17.85"/>
|
||
<line x1="10.044" y1="5.094" x2="8.63" y2="3.68"/>
|
||
<line x1="19" y1="14.05" x2="20.414" y2="15.464"/>
|
||
<line x1="8.2" y1="9.55" x2="6.2" y2="9.55"/>
|
||
<line x1="20.8" y1="9.55" x2="22.8" y2="9.55"/>
|
||
<line x1="10.044" y1="14.006" x2="8.63" y2="15.42"/>
|
||
<line x1="19" y1="5.05" x2="20.414" y2="3.636"/>
|
||
<circle cx="14.5" cy="9.55" r="3.6"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon-with-sun" viewBox="0 0 24 24">
|
||
<title>Auto light/dark, in dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round"
|
||
class="icon-custom-derived-from-feather-sun-and-tabler-moon">
|
||
<path d="M 8.282 7.007 C 8.385 7.007 8.494 7.007 8.595 7.007 C 5.18 10.184 6.481 15.869 10.942 17.24 C 12.275 17.648 13.706 17.589 15 17.066 C 12.851 22.236 5.91 23.143 2.505 18.696 C -0.897 14.249 1.791 7.786 7.342 7.063 C 7.652 7.021 7.965 7 8.282 7 L 8.282 7.007 Z"/>
|
||
<line style="opacity: 50%" x1="18" y1="3.705" x2="18" y2="2.5"/>
|
||
<line style="opacity: 50%" x1="18" y1="11.295" x2="18" y2="12.5"/>
|
||
<line style="opacity: 50%" x1="15.316" y1="4.816" x2="14.464" y2="3.964"/>
|
||
<line style="opacity: 50%" x1="20.711" y1="10.212" x2="21.563" y2="11.063"/>
|
||
<line style="opacity: 50%" x1="14.205" y1="7.5" x2="13.001" y2="7.5"/>
|
||
<line style="opacity: 50%" x1="21.795" y1="7.5" x2="23" y2="7.5"/>
|
||
<line style="opacity: 50%" x1="15.316" y1="10.184" x2="14.464" y2="11.036"/>
|
||
<line style="opacity: 50%" x1="20.711" y1="4.789" x2="21.563" y2="3.937"/>
|
||
<circle style="opacity: 50%" cx="18" cy="7.5" r="2.169"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-pencil" viewBox="0 0 24 24">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-pencil-code">
|
||
<path d="M4 20h4l10.5 -10.5a2.828 2.828 0 1 0 -4 -4l-10.5 10.5v4" />
|
||
<path d="M13.5 6.5l4 4" />
|
||
<path d="M20 21l2 -2l-2 -2" />
|
||
<path d="M17 17l-2 2l2 2" />
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-eye" viewBox="0 0 24 24">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-eye-code">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||
<path d="M10 12a2 2 0 1 0 4 0a2 2 0 0 0 -4 0" />
|
||
<path
|
||
d="M11.11 17.958c-3.209 -.307 -5.91 -2.293 -8.11 -5.958c2.4 -4 5.4 -6 9 -6c3.6 0 6.6 2 9 6c-.21 .352 -.427 .688 -.647 1.008" />
|
||
<path d="M20 21l2 -2l-2 -2" />
|
||
<path d="M17 17l-2 2l2 2" />
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
|
||
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation" aria-label="Toggle site navigation sidebar">
|
||
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" aria-label="Toggle table of contents sidebar">
|
||
<label class="overlay sidebar-overlay" for="__navigation"></label>
|
||
<label class="overlay toc-overlay" for="__toc"></label>
|
||
|
||
<a class="skip-to-content muted-link" href="#furo-main-content">Skip to content</a>
|
||
|
||
|
||
|
||
<div class="page">
|
||
<header class="mobile-header">
|
||
<div class="header-left">
|
||
<label class="nav-overlay-icon" for="__navigation">
|
||
<span class="icon"><svg><use href="#svg-menu"></use></svg></span>
|
||
</label>
|
||
</div>
|
||
<div class="header-center">
|
||
<a href="../../index.html"><div class="brand">SearXNG Documentation (2026.5.7+ef6290c8c)</div></a>
|
||
</div>
|
||
<div class="header-right">
|
||
<div class="theme-toggle-container theme-toggle-header">
|
||
<button class="theme-toggle" aria-label="Toggle Light / Dark / Auto color theme">
|
||
<svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg>
|
||
<svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
|
||
<span class="icon"><svg><use href="#svg-toc"></use></svg></span>
|
||
</label>
|
||
</div>
|
||
</header>
|
||
<aside class="sidebar-drawer">
|
||
<div class="sidebar-container">
|
||
|
||
<div class="sidebar-sticky"><div class="sidebar-scroll"><a class="sidebar-brand" href="../../index.html">
|
||
<div class="sidebar-logo-container">
|
||
<img class="sidebar-logo" src="../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||
</div>
|
||
|
||
<span class="sidebar-brand-text">SearXNG Documentation (2026.5.7+ef6290c8c)</span>
|
||
|
||
</a><form class="sidebar-search-container" method="get" action="../../search.html" role="search">
|
||
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
|
||
<input type="hidden" name="check_keywords" value="yes">
|
||
<input type="hidden" name="area" value="default">
|
||
</form>
|
||
<div id="searchbox"></div><div class="sidebar-tree">
|
||
<ul>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../user/index.html">User information</a><input aria-label="Toggle navigation of User information" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../user/search-syntax.html">Search syntax</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../user/configured_engines.html">Configured Engines</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../user/about.html">About SearXNG</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../own-instance.html">Why use a private instance?</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../admin/index.html">Administrator documentation</a><input aria-label="Toggle navigation of Administrator documentation" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../admin/settings/index.html">Settings</a><input aria-label="Toggle navigation of Settings" class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings.html"><code class="docutils literal notranslate"><span class="pre">settings.yml</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_engines.html"><code class="docutils literal notranslate"><span class="pre">engines:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_brand.html"><code class="docutils literal notranslate"><span class="pre">brand:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_general.html"><code class="docutils literal notranslate"><span class="pre">general:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_search.html"><code class="docutils literal notranslate"><span class="pre">search:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_server.html"><code class="docutils literal notranslate"><span class="pre">server:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_ui.html"><code class="docutils literal notranslate"><span class="pre">ui:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_redis.html"><code class="docutils literal notranslate"><span class="pre">redis:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_valkey.html"><code class="docutils literal notranslate"><span class="pre">valkey:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_outgoing.html"><code class="docutils literal notranslate"><span class="pre">outgoing:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_categories_as_tabs.html"><code class="docutils literal notranslate"><span class="pre">categories_as_tabs:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../admin/settings/settings_plugins.html"><code class="docutils literal notranslate"><span class="pre">plugins:</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation.html">Installation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-docker.html">Installation container</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-scripts.html">Installation Script</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-searxng.html">Step by step installation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-granian.html">Granian</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-uwsgi.html">uWSGI</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-nginx.html">NGINX</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/installation-apache.html">Apache</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/update-searxng.html">SearXNG maintenance</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/answer-captcha.html">Answer CAPTCHA from server’s IP</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/searx.favicons.html">Favicons</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/searx.limiter.html">Limiter</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/api.html">Administration API</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/architecture.html">Architecture</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/plugins.html">List of plugins</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../admin/buildhosts.html">Buildhosts</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../dev/index.html">Developer documentation</a><input aria-label="Toggle navigation of Developer documentation" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/quickstart.html">Development Quickstart</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/commits.html">Git Commits & Change Management</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/contribution_guide.html">How to contribute</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/extended_types.html">Extended Types</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../dev/engines/index.html">Engine Implementations</a><input aria-label="Toggle navigation of Engine Implementations" class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/enginelib.html">Engine Library</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/engines.html">SearXNG’s engines loader</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/engine_overview.html">Engine Overview</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/demo/demo_online.html">Demo Online Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/xpath.html">XPath Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/mediawiki.html">MediaWiki Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/json_engine.html">JSON Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/adobe_stock.html">Adobe Stock</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/alpinelinux.html">Alpine Linux Packages</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/annas_archive.html">Anna’s Archive</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/aol.html">AOL</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/archlinux.html">Arch Linux</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/arxiv.html">arXiv</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/astrophysics_data_system.html">Astrophysics Data System (ADS)</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/azure.html">Azure Resources</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/bing.html">Bing Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/bpb.html">Bpb</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/brave.html">Brave Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/bt4g.html">BT4G</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/chinaso.html">ChinaSo</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/core.html">CORE</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/crossref.html">Crossref</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/dailymotion.html">Dailymotion</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/discourse.html">Discourse Forums</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/duckduckgo.html">DuckDuckGo Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/geizhals.html">Geizhals</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/gitea.html">Gitea</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/github_code.html">Github Code</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/gitlab.html">GitLab</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/google.html">Google Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/huggingface.html">Hugging Face</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/karmasearch.html">Karmasearch</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/lemmy.html">Lemmy</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/loc.html">Library of Congress</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/marginalia.html">Marginalia Search</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/mastodon.html">Mastodon</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/moviepilot.html">Moviepilot</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/mrs.html">Matrix Rooms Search (MRS)</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/mwmbl.html">Mwmbl Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/odysee.html">Odysee</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/openalex.html">OpenAlex</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/openlibrary.html">Open Library</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/peertube.html">Peertube Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/piped.html">Piped</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/presearch.html">Presearch Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/pubmed.html">PubMed</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/qwant.html">Qwant</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/radio_browser.html">RadioBrowser</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/recoll.html">Recoll Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/repology.html">Repology</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/reuters.html">Reuters</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/semantic_scholar.html">Semantic Scholar</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/soundcloud.html">Soundcloud</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/sourcehut.html">Sourcehut</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/springer.html">Springer Nature</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/startpage.html">Startpage Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/tagesschau.html">Tagesschau API</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/torznab.html">Torznab WebAPI</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/tubearchivist.html">Tube Archivist</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/void.html">Void Linux binary packages</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/wallhaven.html">Wallhaven</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/wikipedia.html">Wikimedia</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/yacy.html">Yacy</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/yahoo.html">Yahoo Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online/zlibrary.html">Z-Library</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/offline_concept.html">Offline Concept</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/demo/demo_offline.html">Demo Offline Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/offline/command-line-engines.html">Command Line Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/offline/nosql-engines.html">NoSQL databases</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/offline/search-indexer-engines.html">Local Search APIs</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/offline/sql-engines.html">SQL Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/engines/online_url_search/tineye.html">Tineye</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../dev/result_types/index.html">Result Types</a><input aria-label="Toggle navigation of Result Types" class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/result_types/base_result.html">Result</a></li>
|
||
<li class="toctree-l3 has-children"><a class="reference internal" href="../../dev/result_types/main_result.html">Main Search Results</a><input aria-label="Toggle navigation of Main Search Results" class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/result_types/main/mainresult.html"><code class="docutils literal notranslate"><span class="pre">MainResult</span></code></a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/result_types/main/keyvalue.html">Key-Value Results</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/result_types/main/code.html">Code Results</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/result_types/main/paper.html">Paper Results</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/result_types/main/file.html">File Results</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/result_types/answer.html">Answer Results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/result_types/correction.html">Correction Results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/result_types/suggestion.html">Suggestion Results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/result_types/infobox.html">Infobox Results</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/templates.html">Simple Theme Templates</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/search_api.html">Search API</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../dev/plugins/index.html">Plugins</a><input aria-label="Toggle navigation of Plugins" class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/plugins/development.html">Plugin Development</a></li>
|
||
<li class="toctree-l3 has-children"><a class="reference internal" href="../../dev/plugins/builtins.html">Built-in Plugins</a><input aria-label="Toggle navigation of Built-in Plugins" class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/calculator.html">Calculator</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/hash_plugin.html">Hash Values</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/hostnames.html">Hostnames</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/infinite_scroll.html">Infinite scroll</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/self_info.html">Self-Info</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/tor_check.html">Tor check</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/unit_converter.html">Unit Converter</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/plugins/time_zone.html">Time Zone</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../dev/answerers/index.html">Answerers</a><input aria-label="Toggle navigation of Answerers" class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" role="switch" type="checkbox"/><label for="toctree-checkbox-10"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/answerers/development.html">Answerer Development</a></li>
|
||
<li class="toctree-l3 has-children"><a class="reference internal" href="../../dev/answerers/builtins.html">Built-in Answerers</a><input aria-label="Toggle navigation of Built-in Answerers" class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" role="switch" type="checkbox"/><label for="toctree-checkbox-11"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/answerers/random.html">Random</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../dev/answerers/statistics.html">Statistics</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/translation.html">Translation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/makefile.html">Makefile & <code class="docutils literal notranslate"><span class="pre">./manage</span></code></a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../dev/reST.html">reST primer</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../dev/searxng_extra/index.html">Tooling box <code class="docutils literal notranslate"><span class="pre">searxng_extra</span></code></a><input aria-label="Toggle navigation of Tooling box searxng_extra" class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" role="switch" type="checkbox"/><label for="toctree-checkbox-12"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../dev/searxng_extra/update.html"><code class="docutils literal notranslate"><span class="pre">searxng_extra/update/</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../utils/index.html">DevOps tooling box</a><input aria-label="Toggle navigation of DevOps tooling box" class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" role="switch" type="checkbox"/><label for="toctree-checkbox-13"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../utils/searxng.sh.html"><code class="docutils literal notranslate"><span class="pre">utils/searxng.sh</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../src/index.html">Source-Code</a><input aria-label="Toggle navigation of Source-Code" class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" role="switch" type="checkbox"/><label for="toctree-checkbox-14"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.babel_extract.html">Custom message extractor (i18n)</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.botdetection.html">Bot Detection</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.cache.html">Caches</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.exceptions.html">SearXNG Exceptions</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.favicons.html">Favicons (source)</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.infopage.html">Online <code class="docutils literal notranslate"><span class="pre">/info</span></code></a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.locales.html">Locales</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.search.html">Search</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.search.processors.html">Search processors</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.settings.html">Settings Loader</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.sqlitedb.html">SQLite DB</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.utils.html">Utility functions for the engines</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.valkeydb.html">Valkey DB</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.valkeylib.html">Valkey Library</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../src/searx.weather.html">Weather</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</aside>
|
||
<div class="main">
|
||
<div class="content">
|
||
<div class="article-container">
|
||
<a href="#" class="back-to-top muted-link">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
|
||
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
|
||
</svg>
|
||
<span>Back to top</span>
|
||
</a>
|
||
<div class="content-icon-container">
|
||
<div class="theme-toggle-container theme-toggle-content">
|
||
<button class="theme-toggle" aria-label="Toggle Light / Dark / Auto color theme">
|
||
<svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg>
|
||
<svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
|
||
<span class="icon"><svg><use href="#svg-toc"></use></svg></span>
|
||
</label>
|
||
</div>
|
||
<article role="main" id="furo-main-content">
|
||
<h1>Source code for searx.utils</h1><div class="highlight"><pre>
|
||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||
<span class="sd">"""Utility functions for the engines"""</span>
|
||
|
||
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">importlib</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">importlib.util</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">types</span>
|
||
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">t</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">collections.abc</span><span class="w"> </span><span class="kn">import</span> <span class="n">MutableMapping</span><span class="p">,</span> <span class="n">Callable</span>
|
||
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">numbers</span><span class="w"> </span><span class="kn">import</span> <span class="n">Number</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">os.path</span><span class="w"> </span><span class="kn">import</span> <span class="n">splitext</span><span class="p">,</span> <span class="n">join</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">random</span><span class="w"> </span><span class="kn">import</span> <span class="n">choice</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">html.parser</span><span class="w"> </span><span class="kn">import</span> <span class="n">HTMLParser</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">html</span><span class="w"> </span><span class="kn">import</span> <span class="n">escape</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">urllib.parse</span><span class="w"> </span><span class="kn">import</span> <span class="n">urljoin</span><span class="p">,</span> <span class="n">urlparse</span><span class="p">,</span> <span class="n">parse_qs</span><span class="p">,</span> <span class="n">urlencode</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">timedelta</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">markdown_it</span><span class="w"> </span><span class="kn">import</span> <span class="n">MarkdownIt</span>
|
||
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">lxml</span><span class="w"> </span><span class="kn">import</span> <span class="n">html</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">lxml.etree</span><span class="w"> </span><span class="kn">import</span> <span class="n">XPath</span><span class="p">,</span> <span class="n">XPathError</span><span class="p">,</span> <span class="n">XPathSyntaxError</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">lxml.etree</span><span class="w"> </span><span class="kn">import</span> <span class="n">ElementBase</span><span class="p">,</span> <span class="n">_Element</span> <span class="c1"># pyright: ignore[reportPrivateUsage]</span>
|
||
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx</span><span class="w"> </span><span class="kn">import</span> <span class="n">settings</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.data</span><span class="w"> </span><span class="kn">import</span> <span class="n">USER_AGENTS</span><span class="p">,</span> <span class="n">gsa_useragents_loader</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.version</span><span class="w"> </span><span class="kn">import</span> <span class="n">VERSION_TAG</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.exceptions</span><span class="w"> </span><span class="kn">import</span> <span class="n">SearxXPathSyntaxException</span><span class="p">,</span> <span class="n">SearxEngineXPathException</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx</span><span class="w"> </span><span class="kn">import</span> <span class="n">logger</span>
|
||
|
||
<span class="n">logger</span> <span class="o">=</span> <span class="n">logger</span><span class="o">.</span><span class="n">getChild</span><span class="p">(</span><span class="s1">'utils'</span><span class="p">)</span>
|
||
|
||
<span class="n">XPathSpecType</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">TypeAlias</span> <span class="o">=</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">XPath</span>
|
||
<span class="sd">"""Type alias used by :py:obj:`searx.utils.get_xpath`,</span>
|
||
<span class="sd">:py:obj:`searx.utils.eval_xpath` and other XPath selectors."""</span>
|
||
|
||
<span class="n">ElementType</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">TypeAlias</span> <span class="o">=</span> <span class="n">ElementBase</span> <span class="o">|</span> <span class="n">_Element</span>
|
||
|
||
|
||
<span class="n">_BLOCKED_TAGS</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'script'</span><span class="p">,</span> <span class="s1">'style'</span><span class="p">)</span>
|
||
|
||
<span class="n">_ECMA_UNESCAPE4_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'</span><span class="si">%u</span><span class="s1">([0-9a-fA-F]</span><span class="si">{4}</span><span class="s1">)'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">UNICODE</span><span class="p">)</span>
|
||
<span class="n">_ECMA_UNESCAPE2_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'%([0-9a-fA-F]</span><span class="si">{2}</span><span class="s1">)'</span><span class="p">,</span> <span class="n">re</span><span class="o">.</span><span class="n">UNICODE</span><span class="p">)</span>
|
||
|
||
<span class="n">_JS_STRING_DELIMITERS</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'(["</span><span class="se">\'</span><span class="s1">`])'</span><span class="p">)</span>
|
||
<span class="n">_JS_QUOTE_KEYS_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'([\{\s,])([\$_\w][\$_\w0-9]*)(:)'</span><span class="p">)</span>
|
||
<span class="n">_JS_VOID_OR_UNDEFINED_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'void\s+[0-9]+|void\s*\([0-9]+\)|undefined'</span><span class="p">)</span>
|
||
<span class="n">_JS_DECIMAL_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"([\[\,:])\s*(\-?)\s*([0-9_]*)\.([0-9_]*)"</span><span class="p">)</span>
|
||
<span class="n">_JS_DECIMAL2_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"([\[\,:])\s*(\-?)\s*([0-9_]+)"</span><span class="p">)</span>
|
||
<span class="n">_JS_EXTRA_COMA_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"\s*,\s*([\]\}])"</span><span class="p">)</span>
|
||
<span class="n">_JS_STRING_ESCAPE_RE</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">'</span><span class="se">\\</span><span class="s1">(.)'</span><span class="p">)</span>
|
||
<span class="n">_JSON_PASSTHROUGH_ESCAPES</span> <span class="o">=</span> <span class="sa">R</span><span class="s1">'"\bfnrtu'</span>
|
||
|
||
<span class="n">_XPATH_CACHE</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">XPath</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
|
||
<span class="n">_LANG_TO_LC_CACHE</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{}</span>
|
||
|
||
|
||
<span class="k">class</span><span class="w"> </span><span class="nc">_NotSetClass</span><span class="p">:</span> <span class="c1"># pylint: disable=too-few-public-methods</span>
|
||
<span class="w"> </span><span class="sd">"""Internal class for this module, do not create instance of this class.</span>
|
||
<span class="sd"> Replace the None value, allow explicitly pass None as a function argument"""</span>
|
||
|
||
|
||
<span class="n">_NOTSET</span> <span class="o">=</span> <span class="n">_NotSetClass</span><span class="p">()</span>
|
||
|
||
|
||
<div class="viewcode-block" id="searxng_useragent">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.searxng_useragent">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">searxng_useragent</span><span class="p">()</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Return the SearXNG User Agent"""</span>
|
||
<span class="k">return</span> <span class="sa">f</span><span class="s2">"SearXNG/</span><span class="si">{</span><span class="n">VERSION_TAG</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">settings</span><span class="p">[</span><span class="s1">'outgoing'</span><span class="p">][</span><span class="s1">'useragent_suffix'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="gen_useragent">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.gen_useragent">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">gen_useragent</span><span class="p">(</span><span class="n">os_string</span><span class="p">:</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Return a random browser User Agent</span>
|
||
|
||
<span class="sd"> See searx/data/useragents.json</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">USER_AGENTS</span><span class="p">[</span><span class="s1">'ua'</span><span class="p">]</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
|
||
<span class="n">os</span><span class="o">=</span><span class="n">os_string</span> <span class="ow">or</span> <span class="n">choice</span><span class="p">(</span><span class="n">USER_AGENTS</span><span class="p">[</span><span class="s1">'os'</span><span class="p">]),</span>
|
||
<span class="n">version</span><span class="o">=</span><span class="n">choice</span><span class="p">(</span><span class="n">USER_AGENTS</span><span class="p">[</span><span class="s1">'versions'</span><span class="p">]),</span>
|
||
<span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="gen_gsa_useragent">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.gen_gsa_useragent">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">gen_gsa_useragent</span><span class="p">()</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Return a random "Google Go App" User Agent suitable for Google</span>
|
||
|
||
<span class="sd"> See searx/data/gsa_useragents.txt</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="n">choice</span><span class="p">(</span><span class="n">gsa_useragents_loader</span><span class="p">())</span> <span class="o">+</span> <span class="s2">" NSTNWV"</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="HTMLTextExtractor">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.HTMLTextExtractor">[docs]</a>
|
||
<span class="k">class</span><span class="w"> </span><span class="nc">HTMLTextExtractor</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Internal class to extract text from HTML"""</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="n">HTMLParser</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">handle_starttag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">attrs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">]])</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">tag</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">tag</span> <span class="o">==</span> <span class="s1">'br'</span><span class="p">:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">' '</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">handle_endtag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">:</span>
|
||
<span class="k">return</span>
|
||
|
||
<span class="k">if</span> <span class="n">tag</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</</span><span class="si">{</span><span class="n">tag</span><span class="si">}</span><span class="s2">>"</span><span class="p">)</span>
|
||
<span class="k">return</span>
|
||
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">is_valid_tag</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">tags</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">_BLOCKED_TAGS</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">handle_data</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_valid_tag</span><span class="p">():</span>
|
||
<span class="k">return</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">handle_charref</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_valid_tag</span><span class="p">():</span>
|
||
<span class="k">return</span>
|
||
<span class="k">if</span> <span class="n">name</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'x'</span><span class="p">,</span> <span class="s1">'X'</span><span class="p">):</span>
|
||
<span class="n">codepoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">name</span><span class="p">[</span><span class="mi">1</span><span class="p">:],</span> <span class="mi">16</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">codepoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">chr</span><span class="p">(</span><span class="n">codepoint</span><span class="p">))</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">handle_entityref</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">is_valid_tag</span><span class="p">():</span>
|
||
<span class="k">return</span>
|
||
<span class="c1"># codepoint = htmlentitydefs.name2codepoint[name]</span>
|
||
<span class="c1"># self.result.append(chr(codepoint))</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">get_text</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">result</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">message</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="c1"># error handle is needed in <py3.10</span>
|
||
<span class="c1"># https://github.com/python/cpython/pull/8562/files</span>
|
||
<span class="k">raise</span> <span class="ne">AssertionError</span><span class="p">(</span><span class="n">message</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="html_to_text">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.html_to_text">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">html_to_text</span><span class="p">(</span><span class="n">html_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Extract text from a HTML string</span>
|
||
|
||
<span class="sd"> Args:</span>
|
||
<span class="sd"> * html_str (str): string HTML</span>
|
||
|
||
<span class="sd"> Returns:</span>
|
||
<span class="sd"> * str: extracted text</span>
|
||
|
||
<span class="sd"> Examples:</span>
|
||
<span class="sd"> >>> html_to_text('Example <span id="42">#2</span>')</span>
|
||
<span class="sd"> 'Example #2'</span>
|
||
|
||
<span class="sd"> >>> html_to_text('<style>.span { color: red; }</style><span>Example</span>')</span>
|
||
<span class="sd"> 'Example'</span>
|
||
|
||
<span class="sd"> >>> html_to_text(r'regexp: (?&lt;![a-zA-Z]')</span>
|
||
<span class="sd"> 'regexp: (?<![a-zA-Z]'</span>
|
||
|
||
<span class="sd"> >>> html_to_text(r'<p><b>Lorem ipsum </i>dolor sit amet</p>')</span>
|
||
<span class="sd"> 'Lorem ipsum </i>dolor sit amet</p>'</span>
|
||
|
||
<span class="sd"> >>> html_to_text(r'&#x3e &#x3c &#97')</span>
|
||
<span class="sd"> '> < a'</span>
|
||
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">html_str</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="s2">""</span>
|
||
<span class="n">html_str</span> <span class="o">=</span> <span class="n">html_str</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\r</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span>
|
||
<span class="n">html_str</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">html_str</span><span class="o">.</span><span class="n">split</span><span class="p">())</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="n">HTMLTextExtractor</span><span class="p">()</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">s</span><span class="o">.</span><span class="n">feed</span><span class="p">(</span><span class="n">html_str</span><span class="p">)</span>
|
||
<span class="n">s</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||
<span class="k">except</span> <span class="ne">AssertionError</span><span class="p">:</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="n">HTMLTextExtractor</span><span class="p">()</span>
|
||
<span class="n">s</span><span class="o">.</span><span class="n">feed</span><span class="p">(</span><span class="n">escape</span><span class="p">(</span><span class="n">html_str</span><span class="p">,</span> <span class="n">quote</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
|
||
<span class="n">s</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
|
||
<span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">get_text</span><span class="p">()</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="markdown_to_text">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.markdown_to_text">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">markdown_to_text</span><span class="p">(</span><span class="n">markdown_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Extract text from a Markdown string</span>
|
||
|
||
<span class="sd"> Args:</span>
|
||
<span class="sd"> * markdown_str (str): string Markdown</span>
|
||
|
||
<span class="sd"> Returns:</span>
|
||
<span class="sd"> * str: extracted text</span>
|
||
|
||
<span class="sd"> Examples:</span>
|
||
<span class="sd"> >>> markdown_to_text('[example](https://example.com)')</span>
|
||
<span class="sd"> 'example'</span>
|
||
|
||
<span class="sd"> >>> markdown_to_text('## Headline')</span>
|
||
<span class="sd"> 'Headline'</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="n">html_str</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="p">(</span>
|
||
<span class="n">MarkdownIt</span><span class="p">(</span><span class="s2">"commonmark"</span><span class="p">,</span> <span class="p">{</span><span class="s2">"typographer"</span><span class="p">:</span> <span class="kc">True</span><span class="p">})</span><span class="o">.</span><span class="n">enable</span><span class="p">([</span><span class="s2">"replacements"</span><span class="p">,</span> <span class="s2">"smartquotes"</span><span class="p">])</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">markdown_str</span><span class="p">)</span>
|
||
<span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">html_str</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="extract_text">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.extract_text">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">extract_text</span><span class="p">(</span>
|
||
<span class="n">xpath_results</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">ElementType</span><span class="p">]</span> <span class="o">|</span> <span class="n">ElementType</span> <span class="o">|</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">Number</span> <span class="o">|</span> <span class="nb">bool</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span>
|
||
<span class="n">allow_none</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
||
<span class="p">)</span> <span class="o">-></span> <span class="nb">str</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Extract text from a lxml result</span>
|
||
|
||
<span class="sd"> - If ``xpath_results`` is a list of :py:obj:`ElementType` objects, extract</span>
|
||
<span class="sd"> the text from each result and concatenate the list in a string.</span>
|
||
|
||
<span class="sd"> - If ``xpath_results`` is a :py:obj:`ElementType` object, extract all the</span>
|
||
<span class="sd"> text node from it ( :py:obj:`lxml.html.tostring`, ``method="text"`` )</span>
|
||
|
||
<span class="sd"> - If ``xpath_results`` is of type :py:obj:`str` or :py:obj:`Number`,</span>
|
||
<span class="sd"> :py:obj:`bool` the string value is returned.</span>
|
||
|
||
<span class="sd"> - If ``xpath_results`` is of type ``None`` a :py:obj:`ValueError` is raised,</span>
|
||
<span class="sd"> except ``allow_none`` is ``True`` where ``None`` is returned.</span>
|
||
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||
<span class="c1"># it's list of result : concat everything using recursive call</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="s1">''</span>
|
||
<span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">xpath_results</span><span class="p">:</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="n">result</span> <span class="o">+</span> <span class="p">(</span><span class="n">extract_text</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="ow">or</span> <span class="s1">''</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">result</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="n">ElementType</span><span class="p">):</span>
|
||
<span class="c1"># it's a element</span>
|
||
<span class="n">text</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">html</span><span class="o">.</span><span class="n">tostring</span><span class="p">(</span> <span class="c1"># type: ignore</span>
|
||
<span class="n">xpath_results</span><span class="p">,</span> <span class="c1"># pyright: ignore[reportArgumentType]</span>
|
||
<span class="n">encoding</span><span class="o">=</span><span class="s1">'unicode'</span><span class="p">,</span>
|
||
<span class="n">method</span><span class="o">=</span><span class="s1">'text'</span><span class="p">,</span>
|
||
<span class="n">with_tail</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
<span class="n">text</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="se">\n</span><span class="s1">'</span><span class="p">,</span> <span class="s1">' '</span><span class="p">)</span> <span class="c1"># type: ignore</span>
|
||
<span class="k">return</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">text</span><span class="o">.</span><span class="n">split</span><span class="p">())</span> <span class="c1"># type: ignore</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">,</span> <span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">Number</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)):</span>
|
||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">xpath_results</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">allow_none</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">xpath_results</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">allow_none</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'extract_text(None, allow_none=False)'</span><span class="p">)</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'unsupported type'</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="normalize_url">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.normalize_url">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">normalize_url</span><span class="p">(</span><span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Normalize URL: add protocol, join URL with base_url, add trailing slash if there is no path</span>
|
||
|
||
<span class="sd"> Args:</span>
|
||
<span class="sd"> * url (str): Relative URL</span>
|
||
<span class="sd"> * base_url (str): Base URL, it must be an absolute URL.</span>
|
||
|
||
<span class="sd"> Example:</span>
|
||
<span class="sd"> >>> normalize_url('https://example.com', 'http://example.com/')</span>
|
||
<span class="sd"> 'https://example.com/'</span>
|
||
<span class="sd"> >>> normalize_url('//example.com', 'http://example.com/')</span>
|
||
<span class="sd"> 'http://example.com/'</span>
|
||
<span class="sd"> >>> normalize_url('//example.com', 'https://example.com/')</span>
|
||
<span class="sd"> 'https://example.com/'</span>
|
||
<span class="sd"> >>> normalize_url('/path?a=1', 'https://example.com')</span>
|
||
<span class="sd"> 'https://example.com/path?a=1'</span>
|
||
<span class="sd"> >>> normalize_url('', 'https://example.com')</span>
|
||
<span class="sd"> 'https://example.com/'</span>
|
||
<span class="sd"> >>> normalize_url('/test', '/path')</span>
|
||
<span class="sd"> raise ValueError</span>
|
||
|
||
<span class="sd"> Raises:</span>
|
||
<span class="sd"> * lxml.etree.ParserError</span>
|
||
|
||
<span class="sd"> Returns:</span>
|
||
<span class="sd"> * str: normalized URL</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="n">url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'//'</span><span class="p">):</span>
|
||
<span class="c1"># add http or https to this kind of url //example.com/</span>
|
||
<span class="n">parsed_search_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">base_url</span><span class="p">)</span>
|
||
<span class="n">url</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{0}</span><span class="s1">:</span><span class="si">{1}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">parsed_search_url</span><span class="o">.</span><span class="n">scheme</span> <span class="ow">or</span> <span class="s1">'http'</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span>
|
||
<span class="k">elif</span> <span class="n">url</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/'</span><span class="p">):</span>
|
||
<span class="c1"># fix relative url to the search engine</span>
|
||
<span class="n">url</span> <span class="o">=</span> <span class="n">urljoin</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span>
|
||
|
||
<span class="c1"># fix relative urls that fall through the crack</span>
|
||
<span class="k">if</span> <span class="s1">'://'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">url</span><span class="p">:</span>
|
||
<span class="n">url</span> <span class="o">=</span> <span class="n">urljoin</span><span class="p">(</span><span class="n">base_url</span><span class="p">,</span> <span class="n">url</span><span class="p">)</span>
|
||
|
||
<span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||
|
||
<span class="c1"># add a / at this end of the url if there is no path</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Cannot parse url'</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="p">:</span>
|
||
<span class="n">url</span> <span class="o">+=</span> <span class="s1">'/'</span>
|
||
|
||
<span class="k">return</span> <span class="n">url</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="extract_url">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.extract_url">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">extract_url</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">ElementType</span><span class="p">]</span> <span class="o">|</span> <span class="n">ElementType</span> <span class="o">|</span> <span class="nb">str</span> <span class="o">|</span> <span class="n">Number</span> <span class="o">|</span> <span class="nb">bool</span> <span class="o">|</span> <span class="kc">None</span><span class="p">,</span> <span class="n">base_url</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Extract and normalize URL from lxml Element</span>
|
||
|
||
<span class="sd"> Example:</span>
|
||
<span class="sd"> >>> def f(s, search_url):</span>
|
||
<span class="sd"> >>> return searx.utils.extract_url(html.fromstring(s), search_url)</span>
|
||
<span class="sd"> >>> f('<span id="42">https://example.com</span>', 'http://example.com/')</span>
|
||
<span class="sd"> 'https://example.com/'</span>
|
||
<span class="sd"> >>> f('https://example.com', 'http://example.com/')</span>
|
||
<span class="sd"> 'https://example.com/'</span>
|
||
<span class="sd"> >>> f('//example.com', 'http://example.com/')</span>
|
||
<span class="sd"> 'http://example.com/'</span>
|
||
<span class="sd"> >>> f('//example.com', 'https://example.com/')</span>
|
||
<span class="sd"> 'https://example.com/'</span>
|
||
<span class="sd"> >>> f('/path?a=1', 'https://example.com')</span>
|
||
<span class="sd"> 'https://example.com/path?a=1'</span>
|
||
<span class="sd"> >>> f('', 'https://example.com')</span>
|
||
<span class="sd"> raise lxml.etree.ParserError</span>
|
||
<span class="sd"> >>> searx.utils.extract_url([], 'https://example.com')</span>
|
||
<span class="sd"> raise ValueError</span>
|
||
|
||
<span class="sd"> Raises:</span>
|
||
<span class="sd"> * ValueError</span>
|
||
<span class="sd"> * lxml.etree.ParserError</span>
|
||
|
||
<span class="sd"> Returns:</span>
|
||
<span class="sd"> * str: normalized URL</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="n">xpath_results</span> <span class="o">==</span> <span class="p">[]:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'Empty url resultset'</span><span class="p">)</span>
|
||
|
||
<span class="n">url</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">xpath_results</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">url</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">normalize_url</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">base_url</span><span class="p">)</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'URL not found'</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="dict_subset">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.dict_subset">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">dict_subset</span><span class="p">(</span><span class="n">dictionary</span><span class="p">:</span> <span class="n">MutableMapping</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">],</span> <span class="n">properties</span><span class="p">:</span> <span class="nb">set</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">MutableMapping</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]:</span>
|
||
<span class="w"> </span><span class="sd">"""Extract a subset of a dict</span>
|
||
|
||
<span class="sd"> Examples:</span>
|
||
<span class="sd"> >>> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'C'])</span>
|
||
<span class="sd"> {'A': 'a', 'C': 'c'}</span>
|
||
<span class="sd"> >>> >> dict_subset({'A': 'a', 'B': 'b', 'C': 'c'}, ['A', 'D'])</span>
|
||
<span class="sd"> {'A': 'a'}</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">return</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">dictionary</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">properties</span> <span class="k">if</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">dictionary</span><span class="p">}</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="humanize_bytes">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.humanize_bytes">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">humanize_bytes</span><span class="p">(</span><span class="n">size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="nb">float</span><span class="p">,</span> <span class="n">precision</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'B '</span><span class="p">,</span> <span class="s1">'KB'</span><span class="p">,</span> <span class="s1">'MB'</span><span class="p">,</span> <span class="s1">'GB'</span><span class="p">,</span> <span class="s1">'TB'</span><span class="p">]</span>
|
||
|
||
<span class="n">x</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="mi">0</span>
|
||
<span class="k">while</span> <span class="n">size</span> <span class="o">></span> <span class="mi">1024</span> <span class="ow">and</span> <span class="n">p</span> <span class="o"><</span> <span class="n">x</span><span class="p">:</span>
|
||
<span class="n">p</span> <span class="o">+=</span> <span class="mi">1</span>
|
||
<span class="n">size</span> <span class="o">=</span> <span class="n">size</span> <span class="o">/</span> <span class="mf">1024.0</span>
|
||
<span class="k">return</span> <span class="s2">"</span><span class="si">%.*f</span><span class="s2"> </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">precision</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">s</span><span class="p">[</span><span class="n">p</span><span class="p">])</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="humanize_number">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.humanize_number">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">humanize_number</span><span class="p">(</span><span class="n">size</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="nb">float</span><span class="p">,</span> <span class="n">precision</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Determine the *human readable* value of a decimal number."""</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="s1">''</span><span class="p">,</span> <span class="s1">'K'</span><span class="p">,</span> <span class="s1">'M'</span><span class="p">,</span> <span class="s1">'B'</span><span class="p">,</span> <span class="s1">'T'</span><span class="p">]</span>
|
||
|
||
<span class="n">x</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="mi">0</span>
|
||
<span class="k">while</span> <span class="n">size</span> <span class="o">></span> <span class="mi">1000</span> <span class="ow">and</span> <span class="n">p</span> <span class="o"><</span> <span class="n">x</span><span class="p">:</span>
|
||
<span class="n">p</span> <span class="o">+=</span> <span class="mi">1</span>
|
||
<span class="n">size</span> <span class="o">=</span> <span class="n">size</span> <span class="o">/</span> <span class="mf">1000.0</span>
|
||
<span class="k">return</span> <span class="s2">"</span><span class="si">%.*f%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">precision</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">s</span><span class="p">[</span><span class="n">p</span><span class="p">])</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="convert_str_to_int">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.convert_str_to_int">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">convert_str_to_int</span><span class="p">(</span><span class="n">number_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Convert number_str to int or 0 if number_str is not a number."""</span>
|
||
<span class="k">if</span> <span class="n">number_str</span><span class="o">.</span><span class="n">isdigit</span><span class="p">():</span>
|
||
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">number_str</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="mi">0</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="extr">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.extr">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">extr</span><span class="p">(</span><span class="n">txt</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">begin</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">end</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">default</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Extract the string between ``begin`` and ``end`` from ``txt``</span>
|
||
|
||
<span class="sd"> :param txt: String to search in</span>
|
||
<span class="sd"> :param begin: First string to be searched for</span>
|
||
<span class="sd"> :param end: Second string to be searched for after ``begin``</span>
|
||
<span class="sd"> :param default: Default value if one of ``begin`` or ``end`` is not</span>
|
||
<span class="sd"> found. Defaults to an empty string.</span>
|
||
<span class="sd"> :return: The string between the two search-strings ``begin`` and ``end``.</span>
|
||
<span class="sd"> If at least one of ``begin`` or ``end`` is not found, the value of</span>
|
||
<span class="sd"> ``default`` is returned.</span>
|
||
|
||
<span class="sd"> Examples:</span>
|
||
<span class="sd"> >>> extr("abcde", "a", "e")</span>
|
||
<span class="sd"> "bcd"</span>
|
||
<span class="sd"> >>> extr("abcde", "a", "z", deafult="nothing")</span>
|
||
<span class="sd"> "nothing"</span>
|
||
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="c1"># From https://github.com/mikf/gallery-dl/blob/master/gallery_dl/text.py#L129</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">first</span> <span class="o">=</span> <span class="n">txt</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">begin</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">begin</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">txt</span><span class="p">[</span><span class="n">first</span> <span class="p">:</span> <span class="n">txt</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">end</span><span class="p">,</span> <span class="n">first</span><span class="p">)]</span>
|
||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">default</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="int_or_zero">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.int_or_zero">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">int_or_zero</span><span class="p">(</span><span class="n">num</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">|</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Convert num to int or 0. num can be either a str or a list.</span>
|
||
<span class="sd"> If num is a list, the first element is converted to int (or return 0 if the list is empty).</span>
|
||
<span class="sd"> If num is a str, see convert_str_to_int</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">num</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">num</span><span class="p">)</span> <span class="o"><</span> <span class="mi">1</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="mi">0</span>
|
||
<span class="n">num</span> <span class="o">=</span> <span class="n">num</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="k">return</span> <span class="n">convert_str_to_int</span><span class="p">(</span><span class="n">num</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">load_module</span><span class="p">(</span><span class="n">filename</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">module_dir</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">types</span><span class="o">.</span><span class="n">ModuleType</span><span class="p">:</span>
|
||
<span class="n">modname</span> <span class="o">=</span> <span class="n">splitext</span><span class="p">(</span><span class="n">filename</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="n">modpath</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">module_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
|
||
<span class="c1"># and https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly</span>
|
||
<span class="n">spec</span> <span class="o">=</span> <span class="n">importlib</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">spec_from_file_location</span><span class="p">(</span><span class="n">modname</span><span class="p">,</span> <span class="n">modpath</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">spec</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Error loading '</span><span class="si">{</span><span class="n">modpath</span><span class="si">}</span><span class="s2">' module"</span><span class="p">)</span>
|
||
<span class="n">module</span> <span class="o">=</span> <span class="n">importlib</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">module_from_spec</span><span class="p">(</span><span class="n">spec</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">spec</span><span class="o">.</span><span class="n">loader</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Error loading '</span><span class="si">{</span><span class="n">modpath</span><span class="si">}</span><span class="s2">' module"</span><span class="p">)</span>
|
||
<span class="n">spec</span><span class="o">.</span><span class="n">loader</span><span class="o">.</span><span class="n">exec_module</span><span class="p">(</span><span class="n">module</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">module</span>
|
||
|
||
|
||
<div class="viewcode-block" id="to_string">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.to_string">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">to_string</span><span class="p">(</span><span class="n">obj</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Convert obj to its string representation."""</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">obj</span>
|
||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">obj</span><span class="p">,</span> <span class="s1">'__str__'</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="nb">repr</span><span class="p">(</span><span class="n">obj</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="ecma_unescape">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.ecma_unescape">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">ecma_unescape</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Python implementation of the unescape javascript function</span>
|
||
|
||
<span class="sd"> https://www.ecma-international.org/ecma-262/6.0/#sec-unescape-string</span>
|
||
<span class="sd"> https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Objets_globaux/unescape</span>
|
||
|
||
<span class="sd"> Examples:</span>
|
||
<span class="sd"> >>> ecma_unescape('%u5409')</span>
|
||
<span class="sd"> '吉'</span>
|
||
<span class="sd"> >>> ecma_unescape('%20')</span>
|
||
<span class="sd"> ' '</span>
|
||
<span class="sd"> >>> ecma_unescape('%F3')</span>
|
||
<span class="sd"> 'ó'</span>
|
||
<span class="sd"> """</span>
|
||
<span class="c1"># "%u5409" becomes "吉"</span>
|
||
<span class="n">string</span> <span class="o">=</span> <span class="n">_ECMA_UNESCAPE4_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="nb">chr</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">16</span><span class="p">)),</span> <span class="n">string</span><span class="p">)</span>
|
||
<span class="c1"># "%20" becomes " ", "%F3" becomes "ó"</span>
|
||
<span class="n">string</span> <span class="o">=</span> <span class="n">_ECMA_UNESCAPE2_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">e</span><span class="p">:</span> <span class="nb">chr</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="mi">16</span><span class="p">)),</span> <span class="n">string</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">string</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="remove_pua_from_str">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.remove_pua_from_str">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">remove_pua_from_str</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Removes unicode's "PRIVATE USE CHARACTER"s (PUA_) from a string.</span>
|
||
|
||
<span class="sd"> .. _PUA: https://en.wikipedia.org/wiki/Private_Use_Areas</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">pua_ranges</span> <span class="o">=</span> <span class="p">((</span><span class="mh">0xE000</span><span class="p">,</span> <span class="mh">0xF8FF</span><span class="p">),</span> <span class="p">(</span><span class="mh">0xF0000</span><span class="p">,</span> <span class="mh">0xFFFFD</span><span class="p">),</span> <span class="p">(</span><span class="mh">0x100000</span><span class="p">,</span> <span class="mh">0x10FFFD</span><span class="p">))</span>
|
||
<span class="n">s</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">string</span><span class="p">:</span>
|
||
<span class="n">i</span> <span class="o">=</span> <span class="nb">ord</span><span class="p">(</span><span class="n">c</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">a</span> <span class="o"><=</span> <span class="n">i</span> <span class="o"><=</span> <span class="n">b</span> <span class="k">for</span> <span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span> <span class="ow">in</span> <span class="n">pua_ranges</span><span class="p">):</span>
|
||
<span class="k">continue</span>
|
||
<span class="n">s</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">c</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">s</span><span class="p">)</span></div>
|
||
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">get_string_replaces_function</span><span class="p">(</span><span class="n">replaces</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">]:</span>
|
||
<span class="n">rep</span> <span class="o">=</span> <span class="p">{</span><span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">k</span><span class="p">):</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">replaces</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||
<span class="n">pattern</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="s2">"|"</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">rep</span><span class="o">.</span><span class="n">keys</span><span class="p">()))</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">func</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">pattern</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">m</span><span class="p">:</span> <span class="n">rep</span><span class="p">[</span><span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">0</span><span class="p">))],</span> <span class="n">text</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">func</span>
|
||
|
||
|
||
<div class="viewcode-block" id="get_engine_from_settings">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.get_engine_from_settings">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">get_engine_from_settings</span><span class="p">(</span><span class="n">name</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
|
||
<span class="w"> </span><span class="sd">"""Return engine configuration from settings.yml of a given engine name"""</span>
|
||
|
||
<span class="k">if</span> <span class="s1">'engines'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">settings</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="p">{}</span>
|
||
|
||
<span class="k">for</span> <span class="n">engine</span> <span class="ow">in</span> <span class="n">settings</span><span class="p">[</span><span class="s1">'engines'</span><span class="p">]:</span>
|
||
<span class="k">if</span> <span class="s1">'name'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">engine</span><span class="p">:</span>
|
||
<span class="k">continue</span>
|
||
<span class="k">if</span> <span class="n">name</span> <span class="o">==</span> <span class="n">engine</span><span class="p">[</span><span class="s1">'name'</span><span class="p">]:</span>
|
||
<span class="k">return</span> <span class="n">engine</span>
|
||
|
||
<span class="k">return</span> <span class="p">{}</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="get_xpath">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.get_xpath">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">get_xpath</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">)</span> <span class="o">-></span> <span class="n">XPath</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Return cached compiled :py:obj:`lxml.etree.XPath` object.</span>
|
||
|
||
<span class="sd"> ``TypeError``:</span>
|
||
<span class="sd"> Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a</span>
|
||
<span class="sd"> :py:obj:`lxml.etree.XPath`.</span>
|
||
|
||
<span class="sd"> ``SearxXPathSyntaxException``:</span>
|
||
<span class="sd"> Raised when there is a syntax error in the *XPath* selector (``str``).</span>
|
||
<span class="sd"> """</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="n">_XPATH_CACHE</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">result</span> <span class="o">=</span> <span class="n">XPath</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="n">XPathSyntaxError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="n">SearxXPathSyntaxException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="n">msg</span><span class="p">))</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span>
|
||
<span class="n">_XPATH_CACHE</span><span class="p">[</span><span class="n">xpath_spec</span><span class="p">]</span> <span class="o">=</span> <span class="n">result</span>
|
||
<span class="k">return</span> <span class="n">result</span>
|
||
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="n">XPath</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">xpath_spec</span>
|
||
|
||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'xpath_spec must be either a str or a lxml.etree.XPath'</span><span class="p">)</span> <span class="c1"># pyright: ignore[reportUnreachable]</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="eval_xpath">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.eval_xpath">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">eval_xpath</span><span class="p">(</span><span class="n">element</span><span class="p">:</span> <span class="n">ElementType</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">)</span> <span class="o">-></span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Equivalent of ``element.xpath(xpath_str)`` but compile ``xpath_str`` into</span>
|
||
<span class="sd"> a :py:obj:`lxml.etree.XPath` object once for all. The return value of</span>
|
||
<span class="sd"> ``xpath(..)`` is complex, read `XPath return values`_ for more details.</span>
|
||
|
||
<span class="sd"> .. _XPath return values:</span>
|
||
<span class="sd"> https://lxml.de/xpathxslt.html#xpath-return-values</span>
|
||
|
||
<span class="sd"> ``TypeError``:</span>
|
||
<span class="sd"> Raised when ``xpath_spec`` is neither a :py:obj:`str` nor a</span>
|
||
<span class="sd"> :py:obj:`lxml.etree.XPath`.</span>
|
||
|
||
<span class="sd"> ``SearxXPathSyntaxException``:</span>
|
||
<span class="sd"> Raised when there is a syntax error in the *XPath* selector (``str``).</span>
|
||
|
||
<span class="sd"> ``SearxEngineXPathException:``</span>
|
||
<span class="sd"> Raised when the XPath can't be evaluated (masked</span>
|
||
<span class="sd"> :py:obj:`lxml.etree..XPathError`).</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">xpath</span><span class="p">:</span> <span class="n">XPath</span> <span class="o">=</span> <span class="n">get_xpath</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">)</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="c1"># https://lxml.de/xpathxslt.html#xpath-return-values</span>
|
||
<span class="k">return</span> <span class="n">xpath</span><span class="p">(</span><span class="n">element</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="n">XPathError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||
<span class="n">arg</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">e</span><span class="o">.</span><span class="n">args</span><span class="p">])</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="n">arg</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="eval_xpath_list">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.eval_xpath_list">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">eval_xpath_list</span><span class="p">(</span><span class="n">element</span><span class="p">:</span> <span class="n">ElementType</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">,</span> <span class="n">min_len</span><span class="p">:</span> <span class="nb">int</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]:</span>
|
||
<span class="w"> </span><span class="sd">"""Same as :py:obj:`searx.utils.eval_xpath`, but additionally ensures the</span>
|
||
<span class="sd"> return value is a :py:obj:`list`. The minimum length of the list is also</span>
|
||
<span class="sd"> checked (if ``min_len`` is set)."""</span>
|
||
|
||
<span class="n">result</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">element</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">result</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="s1">'the result is not a list'</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">min_len</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">min_len</span> <span class="o">></span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="s1">'len(xpath_str) < '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">min_len</span><span class="p">))</span>
|
||
<span class="k">return</span> <span class="n">result</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="eval_xpath_getindex">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.eval_xpath_getindex">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">eval_xpath_getindex</span><span class="p">(</span>
|
||
<span class="n">element</span><span class="p">:</span> <span class="n">ElementType</span><span class="p">,</span>
|
||
<span class="n">xpath_spec</span><span class="p">:</span> <span class="n">XPathSpecType</span><span class="p">,</span>
|
||
<span class="n">index</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
|
||
<span class="n">default</span><span class="p">:</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span> <span class="o">=</span> <span class="n">_NOTSET</span><span class="p">,</span>
|
||
<span class="p">)</span> <span class="o">-></span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Same as :py:obj:`searx.utils.eval_xpath_list`, but returns item on</span>
|
||
<span class="sd"> position ``index`` from the list (index starts with ``0``).</span>
|
||
|
||
<span class="sd"> The exceptions known from :py:obj:`searx.utils.eval_xpath` are thrown. If a</span>
|
||
<span class="sd"> default is specified, this is returned if an element at position ``index``</span>
|
||
<span class="sd"> could not be determined.</span>
|
||
<span class="sd"> """</span>
|
||
|
||
<span class="n">result</span> <span class="o">=</span> <span class="n">eval_xpath_list</span><span class="p">(</span><span class="n">element</span><span class="p">,</span> <span class="n">xpath_spec</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="o">-</span><span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> <span class="o"><=</span> <span class="n">index</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="n">result</span><span class="p">):</span>
|
||
<span class="k">return</span> <span class="n">result</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
||
<span class="k">if</span> <span class="n">default</span> <span class="o">==</span> <span class="n">_NOTSET</span><span class="p">:</span>
|
||
<span class="c1"># raise an SearxEngineXPathException instead of IndexError to record</span>
|
||
<span class="c1"># xpath_spec</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineXPathException</span><span class="p">(</span><span class="n">xpath_spec</span><span class="p">,</span> <span class="s1">'index '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">index</span><span class="p">)</span> <span class="o">+</span> <span class="s1">' not found'</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">default</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="get_embeded_stream_url">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.get_embeded_stream_url">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">get_embeded_stream_url</span><span class="p">(</span><span class="n">url</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Converts a standard video URL into its embed format. Supported services include Youtube,</span>
|
||
<span class="sd"> Facebook, Instagram, TikTok, Dailymotion, and Bilibili.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">parsed_url</span> <span class="o">=</span> <span class="n">urlparse</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="c1"># YouTube</span>
|
||
<span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.youtube.com'</span><span class="p">,</span> <span class="s1">'youtube.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span> <span class="o">==</span> <span class="s1">'/watch'</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">query</span><span class="p">:</span>
|
||
<span class="n">video_id</span> <span class="o">=</span> <span class="n">parse_qs</span><span class="p">(</span><span class="n">parsed_url</span><span class="o">.</span><span class="n">query</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'v'</span><span class="p">,</span> <span class="p">[])</span>
|
||
<span class="k">if</span> <span class="n">video_id</span><span class="p">:</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.youtube-nocookie.com/embed/'</span> <span class="o">+</span> <span class="n">video_id</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||
|
||
<span class="c1"># Facebook</span>
|
||
<span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.facebook.com'</span><span class="p">,</span> <span class="s1">'facebook.com'</span><span class="p">]:</span>
|
||
<span class="n">encoded_href</span> <span class="o">=</span> <span class="n">urlencode</span><span class="p">({</span><span class="s1">'href'</span><span class="p">:</span> <span class="n">url</span><span class="p">})</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.facebook.com/plugins/video.php?allowfullscreen=true&'</span> <span class="o">+</span> <span class="n">encoded_href</span>
|
||
|
||
<span class="c1"># Instagram</span>
|
||
<span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.instagram.com'</span><span class="p">,</span> <span class="s1">'instagram.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/p/'</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'/'</span><span class="p">):</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="n">url</span> <span class="o">+</span> <span class="s1">'embed'</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="n">url</span> <span class="o">+</span> <span class="s1">'/embed'</span>
|
||
|
||
<span class="c1"># TikTok</span>
|
||
<span class="k">elif</span> <span class="p">(</span>
|
||
<span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.tiktok.com'</span><span class="p">,</span> <span class="s1">'tiktok.com'</span><span class="p">]</span>
|
||
<span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/@'</span><span class="p">)</span>
|
||
<span class="ow">and</span> <span class="s1">'/video/'</span> <span class="ow">in</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span>
|
||
<span class="p">):</span>
|
||
<span class="n">path_parts</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/video/'</span><span class="p">)</span>
|
||
<span class="n">video_id</span> <span class="o">=</span> <span class="n">path_parts</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.tiktok.com/embed/'</span> <span class="o">+</span> <span class="n">video_id</span>
|
||
|
||
<span class="c1"># Dailymotion</span>
|
||
<span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.dailymotion.com'</span><span class="p">,</span> <span class="s1">'dailymotion.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/video/'</span><span class="p">):</span>
|
||
<span class="n">path_parts</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">path_parts</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
|
||
<span class="n">video_id</span> <span class="o">=</span> <span class="n">path_parts</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="s1">'https://www.dailymotion.com/embed/video/'</span> <span class="o">+</span> <span class="n">video_id</span>
|
||
|
||
<span class="c1"># Bilibili</span>
|
||
<span class="k">elif</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">netloc</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'www.bilibili.com'</span><span class="p">,</span> <span class="s1">'bilibili.com'</span><span class="p">]</span> <span class="ow">and</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'/video/'</span><span class="p">):</span>
|
||
<span class="n">path_parts</span> <span class="o">=</span> <span class="n">parsed_url</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'/'</span><span class="p">)</span>
|
||
|
||
<span class="n">video_id</span> <span class="o">=</span> <span class="n">path_parts</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
|
||
<span class="n">param_key</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">video_id</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'av'</span><span class="p">):</span>
|
||
<span class="n">video_id</span> <span class="o">=</span> <span class="n">video_id</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
|
||
<span class="n">param_key</span> <span class="o">=</span> <span class="s1">'aid'</span>
|
||
<span class="k">elif</span> <span class="n">video_id</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'BV'</span><span class="p">):</span>
|
||
<span class="n">param_key</span> <span class="o">=</span> <span class="s1">'bvid'</span>
|
||
|
||
<span class="n">iframe_src</span> <span class="o">=</span> <span class="p">(</span>
|
||
<span class="sa">f</span><span class="s1">'https://player.bilibili.com/player.html?</span><span class="si">{</span><span class="n">param_key</span><span class="si">}</span><span class="s1">=</span><span class="si">{</span><span class="n">video_id</span><span class="si">}</span><span class="s1">&high_quality=1&autoplay=false&danmaku=0'</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">iframe_src</span></div>
|
||
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_j2p_process_escape</span><span class="p">(</span><span class="n">match</span><span class="p">:</span> <span class="n">re</span><span class="o">.</span><span class="n">Match</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="c1"># deal with ECMA escape characters</span>
|
||
<span class="n">_escape</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="ow">or</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="p">(</span>
|
||
<span class="sa">Rf</span><span class="s1">'\</span><span class="si">{</span><span class="n">_escape</span><span class="si">}</span><span class="s1">'</span>
|
||
<span class="k">if</span> <span class="n">_escape</span> <span class="ow">in</span> <span class="n">_JSON_PASSTHROUGH_ESCAPES</span>
|
||
<span class="k">else</span> <span class="sa">R</span><span class="s1">'\u00'</span> <span class="k">if</span> <span class="n">_escape</span> <span class="o">==</span> <span class="s1">'x'</span> <span class="k">else</span> <span class="s1">''</span> <span class="k">if</span> <span class="n">_escape</span> <span class="o">==</span> <span class="s1">'</span><span class="se">\n</span><span class="s1">'</span> <span class="k">else</span> <span class="n">_escape</span>
|
||
<span class="p">)</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_j2p_decimal</span><span class="p">(</span><span class="n">match</span><span class="p">:</span> <span class="n">re</span><span class="o">.</span><span class="n">Match</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="p">(</span>
|
||
<span class="k">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||
<span class="o">+</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
|
||
<span class="o">+</span> <span class="p">(</span><span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"_"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> <span class="ow">or</span> <span class="s2">"0"</span><span class="p">)</span>
|
||
<span class="o">+</span> <span class="s2">"."</span>
|
||
<span class="o">+</span> <span class="p">(</span><span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"_"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span> <span class="ow">or</span> <span class="s2">"0"</span><span class="p">)</span>
|
||
<span class="p">)</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_j2p_decimal2</span><span class="p">(</span><span class="n">match</span><span class="p">:</span> <span class="n">re</span><span class="o">.</span><span class="n">Match</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"_"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span>
|
||
|
||
|
||
<div class="viewcode-block" id="js_obj_str_to_python">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.js_obj_str_to_python">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">js_obj_str_to_python</span><span class="p">(</span><span class="n">js_obj_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Convert a javascript variable into JSON and then load the value</span>
|
||
|
||
<span class="sd"> It does not deal with all cases, but it is good enough for now.</span>
|
||
<span class="sd"> chompjs has a better implementation.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="n">js_obj_str_to_json_str</span><span class="p">(</span><span class="n">js_obj_str</span><span class="p">)</span>
|
||
<span class="c1"># load the JSON and return the result</span>
|
||
<span class="k">if</span> <span class="n">s</span> <span class="o">==</span> <span class="s2">""</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"js_obj_str can't be an empty string"</span><span class="p">)</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="n">json</span><span class="o">.</span><span class="n">JSONDecodeError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Internal error: js_obj_str_to_python creates invalid JSON:</span><span class="se">\n</span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"js_obj_str_to_python creates invalid JSON"</span><span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">e</span></div>
|
||
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">js_obj_str_to_json_str</span><span class="p">(</span><span class="n">js_obj_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">js_obj_str</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"js_obj_str must be of type str"</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">js_obj_str</span> <span class="o">==</span> <span class="s2">""</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"js_obj_str can't be an empty string"</span><span class="p">)</span>
|
||
|
||
<span class="c1"># when in_string is not None, it contains the character that has opened the string</span>
|
||
<span class="c1"># either simple quote or double quote</span>
|
||
<span class="n">in_string</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="c1"># cut the string:</span>
|
||
<span class="c1"># r"""{ a:"f\"irst", c:'sec"ond'}"""</span>
|
||
<span class="c1"># becomes</span>
|
||
<span class="c1"># ['{ a:', '"', 'f\\', '"', 'irst', '"', ', c:', "'", 'sec', '"', 'ond', "'", '}']</span>
|
||
<span class="n">parts</span> <span class="o">=</span> <span class="n">_JS_STRING_DELIMITERS</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">js_obj_str</span><span class="p">)</span>
|
||
<span class="c1"># does the previous part ends with a backslash?</span>
|
||
<span class="n">blackslash_just_before</span> <span class="o">=</span> <span class="kc">False</span>
|
||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">p</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">parts</span><span class="p">):</span>
|
||
<span class="k">if</span> <span class="n">p</span> <span class="o">==</span> <span class="n">in_string</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">blackslash_just_before</span><span class="p">:</span>
|
||
<span class="c1"># * the current part matches the character which has opened the string</span>
|
||
<span class="c1"># * there is no antislash just before</span>
|
||
<span class="c1"># --> the current part close the current string</span>
|
||
<span class="n">in_string</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="c1"># replace simple quote and ` by double quote</span>
|
||
<span class="c1"># since JSON supports only double quote for string</span>
|
||
<span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'"'</span>
|
||
|
||
<span class="k">elif</span> <span class="n">in_string</span><span class="p">:</span>
|
||
<span class="c1"># --> we are in a JS string</span>
|
||
<span class="c1"># replace the colon by a temporary character</span>
|
||
<span class="c1"># so _JS_QUOTE_KEYS_RE doesn't have to deal with colon inside the JS strings</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">':'</span><span class="p">,</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span>
|
||
<span class="c1"># replace JS escape sequences by JSON escape sequences</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">_JS_STRING_ESCAPE_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">_j2p_process_escape</span><span class="p">,</span> <span class="n">p</span><span class="p">)</span>
|
||
<span class="c1"># the JS string is delimited by simple quote.</span>
|
||
<span class="c1"># This is not supported by JSON.</span>
|
||
<span class="c1"># simple quote delimited string are converted to double quote delimited string</span>
|
||
<span class="c1"># here, inside a JS string, we escape the double quote</span>
|
||
<span class="k">if</span> <span class="n">in_string</span> <span class="o">==</span> <span class="s2">"'"</span><span class="p">:</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">p</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'"'</span><span class="p">,</span> <span class="sa">r</span><span class="s1">'\"'</span><span class="p">)</span>
|
||
<span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">p</span>
|
||
<span class="c1"># deal with the sequence blackslash then quote</span>
|
||
<span class="c1"># since js_obj_str splits on quote, we detect this case:</span>
|
||
<span class="c1"># * the previous part ends with a black slash</span>
|
||
<span class="c1"># * the current part is a single quote</span>
|
||
<span class="c1"># when detected the blackslash is removed on the previous part</span>
|
||
<span class="k">if</span> <span class="n">blackslash_just_before</span> <span class="ow">and</span> <span class="n">p</span><span class="p">[:</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"'"</span><span class="p">:</span>
|
||
<span class="n">parts</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">parts</span><span class="p">[</span><span class="n">i</span> <span class="o">-</span> <span class="mi">1</span><span class="p">][:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||
|
||
<span class="k">elif</span> <span class="n">in_string</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">p</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'"'</span><span class="p">,</span> <span class="s2">"'"</span><span class="p">,</span> <span class="s2">"`"</span><span class="p">):</span>
|
||
<span class="c1"># we are not in string but p is string delimiter</span>
|
||
<span class="c1"># --> that's the start of a new string</span>
|
||
<span class="n">in_string</span> <span class="o">=</span> <span class="n">p</span>
|
||
<span class="c1"># replace simple quote by double quote</span>
|
||
<span class="c1"># since JSON supports only double quote for string</span>
|
||
<span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'"'</span>
|
||
|
||
<span class="k">elif</span> <span class="n">in_string</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="c1"># we are not in a string</span>
|
||
<span class="c1"># replace by null these values:</span>
|
||
<span class="c1"># * void 0</span>
|
||
<span class="c1"># * void(0)</span>
|
||
<span class="c1"># * undefined</span>
|
||
<span class="c1"># https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/void</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">_JS_VOID_OR_UNDEFINED_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">"null"</span><span class="p">,</span> <span class="n">p</span><span class="p">)</span>
|
||
<span class="c1"># make sure there is a leading zero in front of float</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">_JS_DECIMAL_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">_j2p_decimal</span><span class="p">,</span> <span class="n">p</span><span class="p">)</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">_JS_DECIMAL2_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">_j2p_decimal2</span><span class="p">,</span> <span class="n">p</span><span class="p">)</span>
|
||
<span class="c1"># remove extra coma in a list or an object</span>
|
||
<span class="c1"># for example [1,2,3,] becomes [1,2,3]</span>
|
||
<span class="n">p</span> <span class="o">=</span> <span class="n">_JS_EXTRA_COMA_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="k">lambda</span> <span class="n">match</span><span class="p">:</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">p</span><span class="p">)</span>
|
||
<span class="n">parts</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">p</span>
|
||
|
||
<span class="c1"># update for the next iteration</span>
|
||
<span class="n">blackslash_just_before</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">p</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'</span><span class="se">\\</span><span class="s1">'</span>
|
||
|
||
<span class="c1"># join the string</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="s1">''</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parts</span><span class="p">)</span>
|
||
<span class="c1"># add quote arround the key</span>
|
||
<span class="c1"># { a: 12 }</span>
|
||
<span class="c1"># becomes</span>
|
||
<span class="c1"># { "a": 12 }</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="n">_JS_QUOTE_KEYS_RE</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="sa">r</span><span class="s1">'\1"\2"\3'</span><span class="p">,</span> <span class="n">s</span><span class="p">)</span>
|
||
<span class="c1"># replace the surogate character by colon and strip whitespaces</span>
|
||
<span class="n">s</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="nb">chr</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="s1">':'</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
<span class="k">return</span> <span class="n">s</span>
|
||
|
||
|
||
<div class="viewcode-block" id="parse_duration_string">
|
||
<a class="viewcode-back" href="../../src/searx.utils.html#searx.utils.parse_duration_string">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">parse_duration_string</span><span class="p">(</span><span class="n">duration_str</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">timedelta</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="w"> </span><span class="sd">"""Parse a time string in format MM:SS or HH:MM:SS and convert it to a `timedelta` object.</span>
|
||
|
||
<span class="sd"> Returns None if the provided string doesn't match any of the formats.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">duration_str</span> <span class="o">=</span> <span class="n">duration_str</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">duration_str</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="c1"># prepending ["00"] here inits hours to 0 if they are not provided</span>
|
||
<span class="n">time_parts</span> <span class="o">=</span> <span class="p">([</span><span class="s2">"00"</span><span class="p">]</span> <span class="o">+</span> <span class="n">duration_str</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">":"</span><span class="p">))[:</span><span class="mi">3</span><span class="p">]</span>
|
||
<span class="n">hours</span><span class="p">,</span> <span class="n">minutes</span><span class="p">,</span> <span class="n">seconds</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">time_parts</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">hours</span><span class="o">=</span><span class="n">hours</span><span class="p">,</span> <span class="n">minutes</span><span class="o">=</span><span class="n">minutes</span><span class="p">,</span> <span class="n">seconds</span><span class="o">=</span><span class="n">seconds</span><span class="p">)</span>
|
||
|
||
<span class="k">except</span> <span class="p">(</span><span class="ne">ValueError</span><span class="p">,</span> <span class="ne">TypeError</span><span class="p">):</span>
|
||
<span class="k">pass</span>
|
||
|
||
<span class="k">return</span> <span class="kc">None</span></div>
|
||
|
||
</pre></div>
|
||
</article>
|
||
</div>
|
||
<footer>
|
||
|
||
<div class="related-pages">
|
||
|
||
|
||
</div>
|
||
<div class="bottom-of-page">
|
||
<div class="left-details">
|
||
<div class="copyright">
|
||
Copyright © SearXNG team
|
||
</div>
|
||
Made with
|
||
<a href="https://github.com/pradyunsg/furo">Furo</a>
|
||
|
||
</div>
|
||
<div class="right-details">
|
||
<div class="icons">
|
||
<a class="muted-link " href="https://github.com/searxng/searxng/" aria-label="GitHub">💾</a>
|
||
<a class="muted-link " href="https://searx.space/" aria-label="searx.space">🌐</a>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</footer>
|
||
</div>
|
||
<aside class="toc-drawer no-toc">
|
||
|
||
|
||
|
||
</aside>
|
||
</div>
|
||
</div><script src="../../_static/documentation_options.js?v=d004a72a"></script>
|
||
<script src="../../_static/doctools.js?v=fd6eb6e6"></script>
|
||
<script src="../../_static/sphinx_highlight.js?v=6ffebe34"></script>
|
||
<script src="../../_static/scripts/furo.js?v=46bd48cc"></script>
|
||
</body>
|
||
</html> |