mirror of
https://github.com/searxng/searxng.git
synced 2026-05-10 13:05:46 +02:00
1009 lines
103 KiB
HTML
1009 lines
103 KiB
HTML
<!doctype html>
|
||
<html class="no-js" lang="en" data-content_root="../../../">
|
||
<head><meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../../genindex.html"><link rel="search" title="Search" href="../../../search.html">
|
||
<link rel="prefetch" href="../../../_static/searxng-wordmark.svg" as="image">
|
||
|
||
<!-- Generated with Sphinx 9.1.0 and Furo 2025.12.19 -->
|
||
<title>searx.engines.startpage - SearXNG Documentation (2026.5.9+0cba32c15)</title>
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=d111a655" />
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/furo.css?v=7bdb33bb" />
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/styles/furo-extensions.css?v=8dab3a3b" />
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/searxng.css?v=4b1b1f10" />
|
||
|
||
|
||
|
||
|
||
<style>
|
||
body {
|
||
--color-code-background: #f2f2f2;
|
||
--color-code-foreground: #1e1e1e;
|
||
|
||
}
|
||
@media not print {
|
||
body[data-theme="dark"] {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
@media (prefers-color-scheme: dark) {
|
||
body:not([data-theme="light"]) {
|
||
--color-code-background: #202020;
|
||
--color-code-foreground: #d0d0d0;
|
||
|
||
}
|
||
}
|
||
}
|
||
</style></head>
|
||
<body>
|
||
|
||
<script>
|
||
document.body.dataset.theme = localStorage.getItem("theme") || "auto";
|
||
</script>
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" style="display: none;">
|
||
<symbol id="svg-toc" viewBox="0 0 24 24">
|
||
<title>Contents</title>
|
||
<svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
|
||
<path d="M408 442h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8zm-8 204c0 4.4 3.6 8 8 8h480c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8H408c-4.4 0-8 3.6-8 8v56zm504-486H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 632H120c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM115.4 518.9L271.7 642c5.8 4.6 14.4.5 14.4-6.9V388.9c0-7.4-8.5-11.5-14.4-6.9L115.4 505.1a8.74 8.74 0 0 0 0 13.8z"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-menu" viewBox="0 0 24 24">
|
||
<title>Menu</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-menu">
|
||
<line x1="3" y1="12" x2="21" y2="12"></line>
|
||
<line x1="3" y1="6" x2="21" y2="6"></line>
|
||
<line x1="3" y1="18" x2="21" y2="18"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-arrow-right" viewBox="0 0 24 24">
|
||
<title>Expand</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather-chevron-right">
|
||
<polyline points="9 18 15 12 9 6"></polyline>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun" viewBox="0 0 24 24">
|
||
<title>Light mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="feather-sun">
|
||
<circle cx="12" cy="12" r="5"></circle>
|
||
<line x1="12" y1="1" x2="12" y2="3"></line>
|
||
<line x1="12" y1="21" x2="12" y2="23"></line>
|
||
<line x1="4.22" y1="4.22" x2="5.64" y2="5.64"></line>
|
||
<line x1="18.36" y1="18.36" x2="19.78" y2="19.78"></line>
|
||
<line x1="1" y1="12" x2="3" y2="12"></line>
|
||
<line x1="21" y1="12" x2="23" y2="12"></line>
|
||
<line x1="4.22" y1="19.78" x2="5.64" y2="18.36"></line>
|
||
<line x1="18.36" y1="5.64" x2="19.78" y2="4.22"></line>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon" viewBox="0 0 24 24">
|
||
<title>Dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-moon">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||
<path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" />
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-sun-with-moon" viewBox="0 0 24 24">
|
||
<title>Auto light/dark, in light mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round"
|
||
class="icon-custom-derived-from-feather-sun-and-tabler-moon">
|
||
<path style="opacity: 50%" d="M 5.411 14.504 C 5.471 14.504 5.532 14.504 5.591 14.504 C 3.639 16.319 4.383 19.569 6.931 20.352 C 7.693 20.586 8.512 20.551 9.25 20.252 C 8.023 23.207 4.056 23.725 2.11 21.184 C 0.166 18.642 1.702 14.949 4.874 14.536 C 5.051 14.512 5.231 14.5 5.411 14.5 L 5.411 14.504 Z"/>
|
||
<line x1="14.5" y1="3.25" x2="14.5" y2="1.25"/>
|
||
<line x1="14.5" y1="15.85" x2="14.5" y2="17.85"/>
|
||
<line x1="10.044" y1="5.094" x2="8.63" y2="3.68"/>
|
||
<line x1="19" y1="14.05" x2="20.414" y2="15.464"/>
|
||
<line x1="8.2" y1="9.55" x2="6.2" y2="9.55"/>
|
||
<line x1="20.8" y1="9.55" x2="22.8" y2="9.55"/>
|
||
<line x1="10.044" y1="14.006" x2="8.63" y2="15.42"/>
|
||
<line x1="19" y1="5.05" x2="20.414" y2="3.636"/>
|
||
<circle cx="14.5" cy="9.55" r="3.6"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-moon-with-sun" viewBox="0 0 24 24">
|
||
<title>Auto light/dark, in dark mode</title>
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round"
|
||
class="icon-custom-derived-from-feather-sun-and-tabler-moon">
|
||
<path d="M 8.282 7.007 C 8.385 7.007 8.494 7.007 8.595 7.007 C 5.18 10.184 6.481 15.869 10.942 17.24 C 12.275 17.648 13.706 17.589 15 17.066 C 12.851 22.236 5.91 23.143 2.505 18.696 C -0.897 14.249 1.791 7.786 7.342 7.063 C 7.652 7.021 7.965 7 8.282 7 L 8.282 7.007 Z"/>
|
||
<line style="opacity: 50%" x1="18" y1="3.705" x2="18" y2="2.5"/>
|
||
<line style="opacity: 50%" x1="18" y1="11.295" x2="18" y2="12.5"/>
|
||
<line style="opacity: 50%" x1="15.316" y1="4.816" x2="14.464" y2="3.964"/>
|
||
<line style="opacity: 50%" x1="20.711" y1="10.212" x2="21.563" y2="11.063"/>
|
||
<line style="opacity: 50%" x1="14.205" y1="7.5" x2="13.001" y2="7.5"/>
|
||
<line style="opacity: 50%" x1="21.795" y1="7.5" x2="23" y2="7.5"/>
|
||
<line style="opacity: 50%" x1="15.316" y1="10.184" x2="14.464" y2="11.036"/>
|
||
<line style="opacity: 50%" x1="20.711" y1="4.789" x2="21.563" y2="3.937"/>
|
||
<circle style="opacity: 50%" cx="18" cy="7.5" r="2.169"/>
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-pencil" viewBox="0 0 24 24">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-pencil-code">
|
||
<path d="M4 20h4l10.5 -10.5a2.828 2.828 0 1 0 -4 -4l-10.5 10.5v4" />
|
||
<path d="M13.5 6.5l4 4" />
|
||
<path d="M20 21l2 -2l-2 -2" />
|
||
<path d="M17 17l-2 2l2 2" />
|
||
</svg>
|
||
</symbol>
|
||
<symbol id="svg-eye" viewBox="0 0 24 24">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor"
|
||
stroke-width="1" stroke-linecap="round" stroke-linejoin="round" class="icon-tabler-eye-code">
|
||
<path stroke="none" d="M0 0h24v24H0z" fill="none" />
|
||
<path d="M10 12a2 2 0 1 0 4 0a2 2 0 0 0 -4 0" />
|
||
<path
|
||
d="M11.11 17.958c-3.209 -.307 -5.91 -2.293 -8.11 -5.958c2.4 -4 5.4 -6 9 -6c3.6 0 6.6 2 9 6c-.21 .352 -.427 .688 -.647 1.008" />
|
||
<path d="M20 21l2 -2l-2 -2" />
|
||
<path d="M17 17l-2 2l2 2" />
|
||
</svg>
|
||
</symbol>
|
||
</svg>
|
||
|
||
<input type="checkbox" class="sidebar-toggle" name="__navigation" id="__navigation" aria-label="Toggle site navigation sidebar">
|
||
<input type="checkbox" class="sidebar-toggle" name="__toc" id="__toc" aria-label="Toggle table of contents sidebar">
|
||
<label class="overlay sidebar-overlay" for="__navigation"></label>
|
||
<label class="overlay toc-overlay" for="__toc"></label>
|
||
|
||
<a class="skip-to-content muted-link" href="#furo-main-content">Skip to content</a>
|
||
|
||
|
||
|
||
<div class="page">
|
||
<header class="mobile-header">
|
||
<div class="header-left">
|
||
<label class="nav-overlay-icon" for="__navigation">
|
||
<span class="icon"><svg><use href="#svg-menu"></use></svg></span>
|
||
</label>
|
||
</div>
|
||
<div class="header-center">
|
||
<a href="../../../index.html"><div class="brand">SearXNG Documentation (2026.5.9+0cba32c15)</div></a>
|
||
</div>
|
||
<div class="header-right">
|
||
<div class="theme-toggle-container theme-toggle-header">
|
||
<button class="theme-toggle" aria-label="Toggle Light / Dark / Auto color theme">
|
||
<svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg>
|
||
<svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
|
||
<span class="icon"><svg><use href="#svg-toc"></use></svg></span>
|
||
</label>
|
||
</div>
|
||
</header>
|
||
<aside class="sidebar-drawer">
|
||
<div class="sidebar-container">
|
||
|
||
<div class="sidebar-sticky"><div class="sidebar-scroll"><a class="sidebar-brand" href="../../../index.html">
|
||
<div class="sidebar-logo-container">
|
||
<img class="sidebar-logo" src="../../../_static/searxng-wordmark.svg" alt="Logo"/>
|
||
</div>
|
||
|
||
<span class="sidebar-brand-text">SearXNG Documentation (2026.5.9+0cba32c15)</span>
|
||
|
||
</a><form class="sidebar-search-container" method="get" action="../../../search.html" role="search">
|
||
<input class="sidebar-search" placeholder="Search" name="q" aria-label="Search">
|
||
<input type="hidden" name="check_keywords" value="yes">
|
||
<input type="hidden" name="area" value="default">
|
||
</form>
|
||
<div id="searchbox"></div><div class="sidebar-tree">
|
||
<ul>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../user/index.html">User information</a><input aria-label="Toggle navigation of User information" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../user/search-syntax.html">Search syntax</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../user/configured_engines.html">Configured Engines</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../user/about.html">About SearXNG</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../own-instance.html">Why use a private instance?</a></li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../admin/index.html">Administrator documentation</a><input aria-label="Toggle navigation of Administrator documentation" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" role="switch" type="checkbox"/><label for="toctree-checkbox-2"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../../admin/settings/index.html">Settings</a><input aria-label="Toggle navigation of Settings" class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" role="switch" type="checkbox"/><label for="toctree-checkbox-3"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings.html"><code class="docutils literal notranslate"><span class="pre">settings.yml</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_engines.html"><code class="docutils literal notranslate"><span class="pre">engines:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_brand.html"><code class="docutils literal notranslate"><span class="pre">brand:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_general.html"><code class="docutils literal notranslate"><span class="pre">general:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_search.html"><code class="docutils literal notranslate"><span class="pre">search:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_server.html"><code class="docutils literal notranslate"><span class="pre">server:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_ui.html"><code class="docutils literal notranslate"><span class="pre">ui:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_redis.html"><code class="docutils literal notranslate"><span class="pre">redis:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_valkey.html"><code class="docutils literal notranslate"><span class="pre">valkey:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_outgoing.html"><code class="docutils literal notranslate"><span class="pre">outgoing:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_categories_as_tabs.html"><code class="docutils literal notranslate"><span class="pre">categories_as_tabs:</span></code></a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../admin/settings/settings_plugins.html"><code class="docutils literal notranslate"><span class="pre">plugins:</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation.html">Installation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-docker.html">Installation container</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-scripts.html">Installation Script</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-searxng.html">Step by step installation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-granian.html">Granian</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-uwsgi.html">uWSGI</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-nginx.html">NGINX</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/installation-apache.html">Apache</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/update-searxng.html">SearXNG maintenance</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/answer-captcha.html">Answer CAPTCHA from server’s IP</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/searx.favicons.html">Favicons</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/searx.limiter.html">Limiter</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/api.html">Administration API</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/architecture.html">Architecture</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/plugins.html">List of plugins</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../admin/buildhosts.html">Buildhosts</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../dev/index.html">Developer documentation</a><input aria-label="Toggle navigation of Developer documentation" class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" role="switch" type="checkbox"/><label for="toctree-checkbox-4"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/quickstart.html">Development Quickstart</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/commits.html">Git Commits & Change Management</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/contribution_guide.html">How to contribute</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/extended_types.html">Extended Types</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../../dev/engines/index.html">Engine Implementations</a><input aria-label="Toggle navigation of Engine Implementations" class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" role="switch" type="checkbox"/><label for="toctree-checkbox-5"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/enginelib.html">Engine Library</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/engines.html">SearXNG’s engines loader</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/engine_overview.html">Engine Overview</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/demo/demo_online.html">Demo Online Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/xpath.html">XPath Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/mediawiki.html">MediaWiki Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/json_engine.html">JSON Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/adobe_stock.html">Adobe Stock</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/alpinelinux.html">Alpine Linux Packages</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/annas_archive.html">Anna’s Archive</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/aol.html">AOL</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/archlinux.html">Arch Linux</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/arxiv.html">arXiv</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/astrophysics_data_system.html">Astrophysics Data System (ADS)</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/azure.html">Azure Resources</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/bing.html">Bing Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/bpb.html">Bpb</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/brave.html">Brave Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/bt4g.html">BT4G</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/chinaso.html">ChinaSo</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/core.html">CORE</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/crossref.html">Crossref</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/dailymotion.html">Dailymotion</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/discourse.html">Discourse Forums</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/duckduckgo.html">DuckDuckGo Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/geizhals.html">Geizhals</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/gitea.html">Gitea</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/github_code.html">Github Code</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/gitlab.html">GitLab</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/google.html">Google Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/huggingface.html">Hugging Face</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/karmasearch.html">Karmasearch</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/lemmy.html">Lemmy</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/loc.html">Library of Congress</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/marginalia.html">Marginalia Search</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/mastodon.html">Mastodon</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/moviepilot.html">Moviepilot</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/mrs.html">Matrix Rooms Search (MRS)</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/mwmbl.html">Mwmbl Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/odysee.html">Odysee</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/openalex.html">OpenAlex</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/openlibrary.html">Open Library</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/peertube.html">Peertube Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/piped.html">Piped</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/presearch.html">Presearch Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/pubmed.html">PubMed</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/qwant.html">Qwant</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/radio_browser.html">RadioBrowser</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/recoll.html">Recoll Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/repology.html">Repology</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/reuters.html">Reuters</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/semantic_scholar.html">Semantic Scholar</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/soundcloud.html">Soundcloud</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/sourcehut.html">Sourcehut</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/springer.html">Springer Nature</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/startpage.html">Startpage Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/tagesschau.html">Tagesschau API</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/torznab.html">Torznab WebAPI</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/tubearchivist.html">Tube Archivist</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/void.html">Void Linux binary packages</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/wallhaven.html">Wallhaven</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/wikipedia.html">Wikimedia</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/yacy.html">Yacy</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/yahoo.html">Yahoo Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online/zlibrary.html">Z-Library</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/offline_concept.html">Offline Concept</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/demo/demo_offline.html">Demo Offline Engine</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/offline/command-line-engines.html">Command Line Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/offline/nosql-engines.html">NoSQL databases</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/offline/search-indexer-engines.html">Local Search APIs</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/offline/sql-engines.html">SQL Engines</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/engines/online_url_search/tineye.html">Tineye</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../../dev/result_types/index.html">Result Types</a><input aria-label="Toggle navigation of Result Types" class="toctree-checkbox" id="toctree-checkbox-6" name="toctree-checkbox-6" role="switch" type="checkbox"/><label for="toctree-checkbox-6"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/result_types/base_result.html">Result</a></li>
|
||
<li class="toctree-l3 has-children"><a class="reference internal" href="../../../dev/result_types/main_result.html">Main Search Results</a><input aria-label="Toggle navigation of Main Search Results" class="toctree-checkbox" id="toctree-checkbox-7" name="toctree-checkbox-7" role="switch" type="checkbox"/><label for="toctree-checkbox-7"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/result_types/main/mainresult.html"><code class="docutils literal notranslate"><span class="pre">MainResult</span></code></a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/result_types/main/keyvalue.html">Key-Value Results</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/result_types/main/code.html">Code Results</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/result_types/main/paper.html">Paper Results</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/result_types/main/file.html">File Results</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/result_types/answer.html">Answer Results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/result_types/correction.html">Correction Results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/result_types/suggestion.html">Suggestion Results</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/result_types/infobox.html">Infobox Results</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/templates.html">Simple Theme Templates</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/search_api.html">Search API</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../../dev/plugins/index.html">Plugins</a><input aria-label="Toggle navigation of Plugins" class="toctree-checkbox" id="toctree-checkbox-8" name="toctree-checkbox-8" role="switch" type="checkbox"/><label for="toctree-checkbox-8"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/plugins/development.html">Plugin Development</a></li>
|
||
<li class="toctree-l3 has-children"><a class="reference internal" href="../../../dev/plugins/builtins.html">Built-in Plugins</a><input aria-label="Toggle navigation of Built-in Plugins" class="toctree-checkbox" id="toctree-checkbox-9" name="toctree-checkbox-9" role="switch" type="checkbox"/><label for="toctree-checkbox-9"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/calculator.html">Calculator</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/hash_plugin.html">Hash Values</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/hostnames.html">Hostnames</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/infinite_scroll.html">Infinite scroll</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/self_info.html">Self-Info</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/tor_check.html">Tor check</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/unit_converter.html">Unit Converter</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/plugins/time_zone.html">Time Zone</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../../dev/answerers/index.html">Answerers</a><input aria-label="Toggle navigation of Answerers" class="toctree-checkbox" id="toctree-checkbox-10" name="toctree-checkbox-10" role="switch" type="checkbox"/><label for="toctree-checkbox-10"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/answerers/development.html">Answerer Development</a></li>
|
||
<li class="toctree-l3 has-children"><a class="reference internal" href="../../../dev/answerers/builtins.html">Built-in Answerers</a><input aria-label="Toggle navigation of Built-in Answerers" class="toctree-checkbox" id="toctree-checkbox-11" name="toctree-checkbox-11" role="switch" type="checkbox"/><label for="toctree-checkbox-11"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/answerers/random.html">Random</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="../../../dev/answerers/statistics.html">Statistics</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/translation.html">Translation</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/makefile.html">Makefile & <code class="docutils literal notranslate"><span class="pre">./manage</span></code></a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../dev/reST.html">reST primer</a></li>
|
||
<li class="toctree-l2 has-children"><a class="reference internal" href="../../../dev/searxng_extra/index.html">Tooling box <code class="docutils literal notranslate"><span class="pre">searxng_extra</span></code></a><input aria-label="Toggle navigation of Tooling box searxng_extra" class="toctree-checkbox" id="toctree-checkbox-12" name="toctree-checkbox-12" role="switch" type="checkbox"/><label for="toctree-checkbox-12"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l3"><a class="reference internal" href="../../../dev/searxng_extra/update.html"><code class="docutils literal notranslate"><span class="pre">searxng_extra/update/</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../utils/index.html">DevOps tooling box</a><input aria-label="Toggle navigation of DevOps tooling box" class="toctree-checkbox" id="toctree-checkbox-13" name="toctree-checkbox-13" role="switch" type="checkbox"/><label for="toctree-checkbox-13"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../utils/searxng.sh.html"><code class="docutils literal notranslate"><span class="pre">utils/searxng.sh</span></code></a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l1 has-children"><a class="reference internal" href="../../../src/index.html">Source-Code</a><input aria-label="Toggle navigation of Source-Code" class="toctree-checkbox" id="toctree-checkbox-14" name="toctree-checkbox-14" role="switch" type="checkbox"/><label for="toctree-checkbox-14"><span class="icon"><svg><use href="#svg-arrow-right"></use></svg></span></label><ul>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.babel_extract.html">Custom message extractor (i18n)</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.botdetection.html">Bot Detection</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.cache.html">Caches</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.exceptions.html">SearXNG Exceptions</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.favicons.html">Favicons (source)</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.infopage.html">Online <code class="docutils literal notranslate"><span class="pre">/info</span></code></a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.locales.html">Locales</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.search.html">Search</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.search.processors.html">Search processors</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.settings.html">Settings Loader</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.sqlitedb.html">SQLite DB</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.utils.html">Utility functions for the engines</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.valkeydb.html">Valkey DB</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.valkeylib.html">Valkey Library</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../../src/searx.weather.html">Weather</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</aside>
|
||
<div class="main">
|
||
<div class="content">
|
||
<div class="article-container">
|
||
<a href="#" class="back-to-top muted-link">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
|
||
<path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12z"></path>
|
||
</svg>
|
||
<span>Back to top</span>
|
||
</a>
|
||
<div class="content-icon-container">
|
||
<div class="theme-toggle-container theme-toggle-content">
|
||
<button class="theme-toggle" aria-label="Toggle Light / Dark / Auto color theme">
|
||
<svg class="theme-icon-when-auto-light"><use href="#svg-sun-with-moon"></use></svg>
|
||
<svg class="theme-icon-when-auto-dark"><use href="#svg-moon-with-sun"></use></svg>
|
||
<svg class="theme-icon-when-dark"><use href="#svg-moon"></use></svg>
|
||
<svg class="theme-icon-when-light"><use href="#svg-sun"></use></svg>
|
||
</button>
|
||
</div>
|
||
<label class="toc-overlay-icon toc-content-icon no-toc" for="__toc">
|
||
<span class="icon"><svg><use href="#svg-toc"></use></svg></span>
|
||
</label>
|
||
</div>
|
||
<article role="main" id="furo-main-content">
|
||
<h1>Source code for searx.engines.startpage</h1><div class="highlight"><pre>
|
||
<span></span><span class="c1"># SPDX-License-Identifier: AGPL-3.0-or-later</span>
|
||
<span class="sd">"""Startpage's language & region selectors are a mess ..</span>
|
||
|
||
<span class="sd">.. _startpage regions:</span>
|
||
|
||
<span class="sd">Startpage regions</span>
|
||
<span class="sd">=================</span>
|
||
|
||
<span class="sd">In the list of regions there are tags we need to map to common region tags::</span>
|
||
|
||
<span class="sd"> pt-BR_BR --> pt_BR</span>
|
||
<span class="sd"> zh-CN_CN --> zh_Hans_CN</span>
|
||
<span class="sd"> zh-TW_TW --> zh_Hant_TW</span>
|
||
<span class="sd"> zh-TW_HK --> zh_Hant_HK</span>
|
||
<span class="sd"> en-GB_GB --> en_GB</span>
|
||
|
||
<span class="sd">and there is at least one tag with a three letter language tag (ISO 639-2)::</span>
|
||
|
||
<span class="sd"> fil_PH --> fil_PH</span>
|
||
|
||
<span class="sd">The locale code ``no_NO`` from Startpage does not exists and is mapped to</span>
|
||
<span class="sd">``nb-NO``::</span>
|
||
|
||
<span class="sd"> babel.core.UnknownLocaleError: unknown locale 'no_NO'</span>
|
||
|
||
<span class="sd">For reference see languages-subtag at iana; ``no`` is the macrolanguage [1]_ and</span>
|
||
<span class="sd">W3C recommends subtag over macrolanguage [2]_.</span>
|
||
|
||
<span class="sd">.. [1] `iana: language-subtag-registry</span>
|
||
<span class="sd"> <https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry>`_ ::</span>
|
||
|
||
<span class="sd"> type: language</span>
|
||
<span class="sd"> Subtag: nb</span>
|
||
<span class="sd"> Description: Norwegian Bokmål</span>
|
||
<span class="sd"> Added: 2005-10-16</span>
|
||
<span class="sd"> Suppress-Script: Latn</span>
|
||
<span class="sd"> Macrolanguage: no</span>
|
||
|
||
<span class="sd">.. [2]</span>
|
||
<span class="sd"> Use macrolanguages with care. Some language subtags have a Scope field set to</span>
|
||
<span class="sd"> macrolanguage, i.e. this primary language subtag encompasses a number of more</span>
|
||
<span class="sd"> specific primary language subtags in the registry. ... As we recommended for</span>
|
||
<span class="sd"> the collection subtags mentioned above, in most cases you should try to use</span>
|
||
<span class="sd"> the more specific subtags ... `W3: The primary language subtag</span>
|
||
<span class="sd"> <https://www.w3.org/International/questions/qa-choosing-language-tags#langsubtag>`_</span>
|
||
|
||
<span class="sd">.. _startpage languages:</span>
|
||
|
||
<span class="sd">Startpage languages</span>
|
||
<span class="sd">===================</span>
|
||
|
||
<span class="sd">HTTP ``Accept-Language`` header (``send_accept_language_header``):</span>
|
||
<span class="sd"> The displayed name in Startpage's settings page depend on the location of the</span>
|
||
<span class="sd"> IP when ``Accept-Language`` HTTP header is unset.</span>
|
||
|
||
<span class="sd"> Startpage tries to guess user's language and territory from the HTTP</span>
|
||
<span class="sd"> ``Accept-Language``. Optional the user can select a search-language (can be</span>
|
||
<span class="sd"> different to the UI language) and a region filter.</span>
|
||
|
||
<span class="sd"> In :py:obj:`fetch_traits` we use::</span>
|
||
|
||
<span class="sd"> 'Accept-Language': "en-US,en;q=0.5",</span>
|
||
<span class="sd"> ..</span>
|
||
|
||
<span class="sd"> to get uniform names independent from the IP).</span>
|
||
|
||
<span class="sd">.. _startpage categories:</span>
|
||
|
||
<span class="sd">Startpage categories</span>
|
||
<span class="sd">====================</span>
|
||
|
||
<span class="sd">Startpage's category (for Web-search, News, Videos, ..) is set by</span>
|
||
<span class="sd">:py:obj:`startpage_categ` in settings.yml::</span>
|
||
|
||
<span class="sd"> - name: startpage</span>
|
||
<span class="sd"> engine: startpage</span>
|
||
<span class="sd"> startpage_categ: web</span>
|
||
<span class="sd"> ...</span>
|
||
|
||
<span class="sd">.. hint::</span>
|
||
|
||
<span class="sd"> Supported categories are ``web``, ``news`` and ``images``.</span>
|
||
|
||
<span class="sd">"""</span>
|
||
<span class="c1"># pylint: disable=too-many-statements</span>
|
||
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">t</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">collections</span><span class="w"> </span><span class="kn">import</span> <span class="n">OrderedDict</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">json</span><span class="w"> </span><span class="kn">import</span> <span class="n">loads</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">unicodedata</span><span class="w"> </span><span class="kn">import</span> <span class="n">combining</span><span class="p">,</span> <span class="n">normalize</span>
|
||
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">babel.localedata</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">dateutil.parser</span>
|
||
<span class="kn">import</span><span class="w"> </span><span class="nn">lxml.html</span>
|
||
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.enginelib</span><span class="w"> </span><span class="kn">import</span> <span class="n">EngineCache</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.enginelib.traits</span><span class="w"> </span><span class="kn">import</span> <span class="n">EngineTraits</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.exceptions</span><span class="w"> </span><span class="kn">import</span> <span class="n">SearxEngineCaptchaException</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.locales</span><span class="w"> </span><span class="kn">import</span> <span class="n">region_tag</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.network</span><span class="w"> </span><span class="kn">import</span> <span class="n">get</span> <span class="c1"># see https://github.com/searxng/searxng/issues/762</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">searx.utils</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
|
||
<span class="n">eval_xpath</span><span class="p">,</span>
|
||
<span class="n">extr</span><span class="p">,</span>
|
||
<span class="n">extract_text</span><span class="p">,</span>
|
||
<span class="n">gen_useragent</span><span class="p">,</span>
|
||
<span class="n">html_to_text</span><span class="p">,</span>
|
||
<span class="n">humanize_bytes</span><span class="p">,</span>
|
||
<span class="n">remove_pua_from_str</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="c1"># about</span>
|
||
<span class="n">about</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="s2">"website"</span><span class="p">:</span> <span class="s2">"https://startpage.com"</span><span class="p">,</span>
|
||
<span class="s2">"wikidata_id"</span><span class="p">:</span> <span class="s2">"Q2333295"</span><span class="p">,</span>
|
||
<span class="s2">"official_api_documentation"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||
<span class="s2">"use_official_api"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||
<span class="s2">"require_api_key"</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
|
||
<span class="s2">"results"</span><span class="p">:</span> <span class="s2">"HTML"</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="n">startpage_categ</span> <span class="o">=</span> <span class="s2">"web"</span>
|
||
<span class="sd">"""Startpage's category, visit :ref:`startpage categories`.</span>
|
||
<span class="sd">"""</span>
|
||
|
||
<span class="c1"># engine dependent config</span>
|
||
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"general"</span><span class="p">,</span> <span class="s2">"web"</span><span class="p">]</span>
|
||
<span class="n">paging</span> <span class="o">=</span> <span class="kc">True</span>
|
||
<span class="n">max_page</span> <span class="o">=</span> <span class="mi">18</span>
|
||
<span class="sd">"""Tested 18 pages maximum (argument ``page``), to be save max is set to 20."""</span>
|
||
|
||
<span class="n">time_range_support</span> <span class="o">=</span> <span class="kc">True</span>
|
||
<span class="n">safesearch</span> <span class="o">=</span> <span class="kc">True</span>
|
||
|
||
<span class="n">time_range_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"day"</span><span class="p">:</span> <span class="s2">"d"</span><span class="p">,</span> <span class="s2">"week"</span><span class="p">:</span> <span class="s2">"w"</span><span class="p">,</span> <span class="s2">"month"</span><span class="p">:</span> <span class="s2">"m"</span><span class="p">,</span> <span class="s2">"year"</span><span class="p">:</span> <span class="s2">"y"</span><span class="p">}</span>
|
||
<span class="n">safesearch_dict</span> <span class="o">=</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s2">"0"</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s2">"0"</span><span class="p">}</span>
|
||
|
||
<span class="c1"># search-url</span>
|
||
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"https://www.startpage.com"</span>
|
||
<span class="n">search_url</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s2">"/sp/search"</span>
|
||
|
||
<span class="c1"># specific xpath variables</span>
|
||
<span class="c1"># ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]</span>
|
||
<span class="c1"># not ads: div[@class="result"] are the direct children of div[@id="results"]</span>
|
||
<span class="n">search_form_xpath</span> <span class="o">=</span> <span class="s1">'//form[@id="search"]'</span>
|
||
<span class="sd">"""XPath of Startpage's origin search form</span>
|
||
|
||
<span class="sd">.. code: html</span>
|
||
|
||
<span class="sd"> <form action="/sp/search" method="post"></span>
|
||
<span class="sd"> <input type="text" name="query" value="" ..></span>
|
||
<span class="sd"> <input type="hidden" name="t" value="device"></span>
|
||
<span class="sd"> <input type="hidden" name="lui" value="english"></span>
|
||
<span class="sd"> <input type="hidden" name="sc" value="Q7Mt5TRqowKB00"></span>
|
||
<span class="sd"> <input type="hidden" name="cat" value="web"></span>
|
||
<span class="sd"> <input type="hidden" class="abp" id="abp-input" name="abp" value="1"></span>
|
||
<span class="sd"> </form></span>
|
||
<span class="sd">"""</span>
|
||
|
||
|
||
<span class="n">CACHE</span><span class="p">:</span> <span class="n">EngineCache</span>
|
||
<span class="sd">"""Persistent (SQLite) key/value cache that deletes its values after ``expire``</span>
|
||
<span class="sd">seconds."""</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">init</span><span class="p">(</span><span class="n">_</span><span class="p">):</span>
|
||
<span class="k">global</span> <span class="n">CACHE</span> <span class="c1"># pylint: disable=global-statement</span>
|
||
|
||
<span class="c1"># hint: all three startpage engines (WEB, Images & News) can/should use the</span>
|
||
<span class="c1"># same sc_code ..</span>
|
||
<span class="n">CACHE</span> <span class="o">=</span> <span class="n">EngineCache</span><span class="p">(</span><span class="s2">"startpage"</span><span class="p">)</span>
|
||
|
||
|
||
<span class="n">sc_code_cache_sec</span> <span class="o">=</span> <span class="mi">3600</span>
|
||
<span class="sd">"""Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`."""</span>
|
||
|
||
|
||
<div class="viewcode-block" id="get_sc_code">
|
||
<a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.get_sc_code">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">get_sc_code</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Get an actual ``sc`` argument from Startpage's search form (HTML page).</span>
|
||
|
||
<span class="sd"> Startpage puts a ``sc`` argument on every HTML :py:obj:`search form</span>
|
||
<span class="sd"> <search_form_xpath>`. Without this argument Startpage considers the request</span>
|
||
<span class="sd"> is from a bot. We do not know what is encoded in the value of the ``sc``</span>
|
||
<span class="sd"> argument, but it seems to be a kind of a *timestamp*.</span>
|
||
|
||
<span class="sd"> Startpage's search form generates a new sc-code on each request. This</span>
|
||
<span class="sd"> function scrapes a new sc-code from Startpage's home page every</span>
|
||
<span class="sd"> :py:obj:`sc_code_cache_sec` seconds."""</span>
|
||
|
||
<span class="n">sc_code</span> <span class="o">=</span> <span class="n">CACHE</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"SC_CODE"</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">sc_code</span><span class="p">:</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: using cached value: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">sc_code</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">sc_code</span>
|
||
|
||
<span class="n">get_sc_url</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s2">"/"</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: querying new sc timestamp @ </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">get_sc_url</span><span class="p">)</span>
|
||
|
||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">params</span><span class="p">[</span><span class="s2">"headers"</span><span class="p">]}</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: request headers: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">headers</span><span class="p">)</span>
|
||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span><span class="n">get_sc_url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
||
|
||
<span class="c1"># ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)</span>
|
||
<span class="c1"># ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg</span>
|
||
<span class="c1"># ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21</span>
|
||
|
||
<span class="k">if</span> <span class="nb">str</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="p">)</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"https://www.startpage.com/sp/captcha"</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span>
|
||
<span class="n">message</span><span class="o">=</span><span class="s2">"get_sc_code: got redirected to https://www.startpage.com/sp/captcha"</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">sc_code</span> <span class="o">=</span> <span class="n">eval_xpath</span><span class="p">(</span><span class="n">dom</span><span class="p">,</span> <span class="n">search_form_xpath</span> <span class="o">+</span> <span class="s1">'//input[@name="sc"]/@value'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||
<span class="k">except</span> <span class="ne">IndexError</span> <span class="k">as</span> <span class="n">exc</span><span class="p">:</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"suspend startpage API --> https://github.com/searxng/searxng/pull/695"</span><span class="p">)</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">(</span>
|
||
<span class="n">message</span><span class="o">=</span><span class="s2">"get_sc_code: [PR-695] querying new sc timestamp failed! (</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="n">resp</span><span class="o">.</span><span class="n">url</span><span class="p">,</span>
|
||
<span class="p">)</span> <span class="kn">from</span><span class="w"> </span><span class="nn">exc</span>
|
||
|
||
<span class="n">sc_code</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">sc_code</span><span class="p">)</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"get_sc_code: new value is: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">sc_code</span><span class="p">)</span>
|
||
<span class="n">CACHE</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="s2">"SC_CODE"</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">sc_code</span><span class="p">,</span> <span class="n">expire</span><span class="o">=</span><span class="n">sc_code_cache_sec</span><span class="p">)</span>
|
||
<span class="k">return</span> <span class="n">sc_code</span></div>
|
||
|
||
|
||
|
||
<div class="viewcode-block" id="request">
|
||
<a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.request">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">request</span><span class="p">(</span><span class="n">query</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Assemble a Startpage request.</span>
|
||
|
||
<span class="sd"> To avoid CAPTCHAs we need to send a well formed HTTP POST request with a</span>
|
||
<span class="sd"> cookie. We need to form a request that is identical to the request built by</span>
|
||
<span class="sd"> Startpage's search form:</span>
|
||
|
||
<span class="sd"> - in the cookie the **region** is selected</span>
|
||
<span class="sd"> - in the HTTP POST data the **language** is selected</span>
|
||
|
||
<span class="sd"> Additionally the arguments form Startpage's search form needs to be set in</span>
|
||
<span class="sd"> HTML POST data / compare ``<input>`` elements: :py:obj:`search_form_xpath`.</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">engine_region</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_region</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"searxng_locale"</span><span class="p">],</span> <span class="s2">"en-US"</span><span class="p">)</span>
|
||
<span class="n">engine_language</span> <span class="o">=</span> <span class="n">traits</span><span class="o">.</span><span class="n">get_language</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"searxng_locale"</span><span class="p">],</span> <span class="s2">"en"</span><span class="p">)</span>
|
||
|
||
<span class="n">params</span><span class="p">[</span><span class="s2">"headers"</span><span class="p">][</span><span class="s2">"Origin"</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span>
|
||
<span class="n">params</span><span class="p">[</span><span class="s2">"headers"</span><span class="p">][</span><span class="s2">"Referer"</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="s2">"/"</span>
|
||
|
||
<span class="c1"># Build form data</span>
|
||
<span class="n">args</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="s2">"query"</span><span class="p">:</span> <span class="n">query</span><span class="p">,</span>
|
||
<span class="s2">"cat"</span><span class="p">:</span> <span class="n">startpage_categ</span><span class="p">,</span>
|
||
<span class="s2">"t"</span><span class="p">:</span> <span class="s2">"device"</span><span class="p">,</span>
|
||
<span class="s2">"sc"</span><span class="p">:</span> <span class="n">get_sc_code</span><span class="p">(</span><span class="n">params</span><span class="p">),</span>
|
||
<span class="s2">"with_date"</span><span class="p">:</span> <span class="n">time_range_dict</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="s2">"time_range"</span><span class="p">],</span> <span class="s2">""</span><span class="p">),</span>
|
||
<span class="s2">"abp"</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span>
|
||
<span class="s2">"abd"</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span>
|
||
<span class="s2">"abe"</span><span class="p">:</span> <span class="s2">"1"</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="k">if</span> <span class="n">engine_language</span><span class="p">:</span>
|
||
<span class="n">args</span><span class="p">[</span><span class="s2">"language"</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||
<span class="n">args</span><span class="p">[</span><span class="s2">"lui"</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||
|
||
<span class="k">if</span> <span class="n">params</span><span class="p">[</span><span class="s2">"pageno"</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||
<span class="n">args</span><span class="p">[</span><span class="s2">"page"</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s2">"pageno"</span><span class="p">]</span>
|
||
<span class="n">args</span><span class="p">[</span><span class="s2">"segment"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"startpage.udog"</span>
|
||
|
||
<span class="c1"># Build cookie</span>
|
||
<span class="n">lang_homepage</span> <span class="o">=</span> <span class="s2">"en"</span>
|
||
<span class="n">cookie</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"date_time"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"world"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"disable_family_filter"</span><span class="p">]</span> <span class="o">=</span> <span class="n">safesearch_dict</span><span class="p">[</span><span class="n">params</span><span class="p">[</span><span class="s2">"safesearch"</span><span class="p">]]</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"disable_open_in_new_window"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"0"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"enable_post_method"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"1"</span> <span class="c1"># hint: POST</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"enable_proxy_safety_suggest"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"1"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"enable_stay_control"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"1"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"instant_answers"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"1"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"lang_homepage"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"s/device/</span><span class="si">%s</span><span class="s2">/"</span> <span class="o">%</span> <span class="n">lang_homepage</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"num_of_results"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"10"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"suggestions"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"1"</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"wt_unit"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"celsius"</span>
|
||
|
||
<span class="k">if</span> <span class="n">engine_language</span><span class="p">:</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"language"</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"language_ui"</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_language</span>
|
||
|
||
<span class="k">if</span> <span class="n">engine_region</span><span class="p">:</span>
|
||
<span class="n">cookie</span><span class="p">[</span><span class="s2">"search_results_region"</span><span class="p">]</span> <span class="o">=</span> <span class="n">engine_region</span>
|
||
|
||
<span class="n">params</span><span class="p">[</span><span class="s2">"cookies"</span><span class="p">][</span><span class="s2">"preferences"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"N1N"</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s2">"</span><span class="si">%s</span><span class="s2">EEE</span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">cookie</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span>
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"cookie preferences: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s2">"cookies"</span><span class="p">][</span><span class="s2">"preferences"</span><span class="p">])</span>
|
||
|
||
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"data: </span><span class="si">%s</span><span class="s2">"</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
|
||
<span class="n">params</span><span class="p">[</span><span class="s2">"data"</span><span class="p">]</span> <span class="o">=</span> <span class="n">args</span>
|
||
<span class="n">params</span><span class="p">[</span><span class="s2">"method"</span><span class="p">]</span> <span class="o">=</span> <span class="s2">"POST"</span>
|
||
<span class="n">params</span><span class="p">[</span><span class="s2">"url"</span><span class="p">]</span> <span class="o">=</span> <span class="n">search_url</span>
|
||
|
||
<span class="k">return</span> <span class="n">params</span></div>
|
||
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_parse_published_date</span><span class="p">(</span><span class="n">content</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">datetime</span> <span class="o">|</span> <span class="kc">None</span><span class="p">]:</span>
|
||
<span class="n">published_date</span> <span class="o">=</span> <span class="kc">None</span>
|
||
|
||
<span class="c1"># check if search result starts with something like: "2 Sep 2014 ... "</span>
|
||
<span class="k">if</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]</span><span class="si">{2}</span><span class="s2"> [0-9]</span><span class="si">{4}</span><span class="s2"> \.\.\. "</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span>
|
||
<span class="n">date_pos</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s2">"..."</span><span class="p">)</span> <span class="o">+</span> <span class="mi">4</span>
|
||
<span class="n">date_string</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">0</span> <span class="p">:</span> <span class="n">date_pos</span> <span class="o">-</span> <span class="mi">5</span><span class="p">]</span>
|
||
<span class="c1"># fix content string</span>
|
||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">date_pos</span><span class="p">:]</span>
|
||
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">published_date</span> <span class="o">=</span> <span class="n">dateutil</span><span class="o">.</span><span class="n">parser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">date_string</span><span class="p">,</span> <span class="n">dayfirst</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||
<span class="k">pass</span>
|
||
|
||
<span class="c1"># check if search result starts with something like: "5 days ago ... "</span>
|
||
<span class="k">elif</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"^[0-9]+ days? ago \.\.\. "</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span>
|
||
<span class="n">date_pos</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s2">"..."</span><span class="p">)</span> <span class="o">+</span> <span class="mi">4</span>
|
||
<span class="n">date_string</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="mi">0</span> <span class="p">:</span> <span class="n">date_pos</span> <span class="o">-</span> <span class="mi">5</span><span class="p">]</span>
|
||
|
||
<span class="c1"># calculate datetime</span>
|
||
<span class="n">published_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">"\d+"</span><span class="p">,</span> <span class="n">date_string</span><span class="p">)</span><span class="o">.</span><span class="n">group</span><span class="p">()))</span> <span class="c1"># type: ignore</span>
|
||
|
||
<span class="c1"># fix content string</span>
|
||
<span class="n">content</span> <span class="o">=</span> <span class="n">content</span><span class="p">[</span><span class="n">date_pos</span><span class="p">:]</span>
|
||
|
||
<span class="k">return</span> <span class="n">content</span><span class="p">,</span> <span class="n">published_date</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_get_web_result</span><span class="p">(</span><span class="n">result</span><span class="p">):</span>
|
||
<span class="n">content</span> <span class="o">=</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"description"</span><span class="p">))</span>
|
||
<span class="n">content</span><span class="p">,</span> <span class="n">publishedDate</span> <span class="o">=</span> <span class="n">_parse_published_date</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="p">{</span>
|
||
<span class="s2">"url"</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s2">"clickUrl"</span><span class="p">],</span>
|
||
<span class="s2">"title"</span><span class="p">:</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">"title"</span><span class="p">]),</span>
|
||
<span class="s2">"content"</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||
<span class="s2">"publishedDate"</span><span class="p">:</span> <span class="n">publishedDate</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_get_news_result</span><span class="p">(</span><span class="n">result</span><span class="p">):</span>
|
||
|
||
<span class="n">title</span> <span class="o">=</span> <span class="n">remove_pua_from_str</span><span class="p">(</span><span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">"title"</span><span class="p">]))</span>
|
||
<span class="n">content</span> <span class="o">=</span> <span class="n">remove_pua_from_str</span><span class="p">(</span><span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"description"</span><span class="p">)))</span>
|
||
|
||
<span class="n">publishedDate</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"date"</span><span class="p">):</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">publishedDate</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">"date"</span><span class="p">])</span> <span class="o">/</span> <span class="mi">1000</span><span class="p">)</span>
|
||
<span class="k">except</span> <span class="p">(</span><span class="ne">TypeError</span><span class="p">,</span> <span class="ne">ValueError</span><span class="p">):</span>
|
||
<span class="k">pass</span>
|
||
|
||
<span class="n">thumbnailUrl</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"thumbnailUrl"</span><span class="p">):</span>
|
||
<span class="n">thumbnailUrl</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">result</span><span class="p">[</span><span class="s2">"thumbnailUrl"</span><span class="p">]</span>
|
||
|
||
<span class="k">return</span> <span class="p">{</span>
|
||
<span class="s2">"url"</span><span class="p">:</span> <span class="n">result</span><span class="p">[</span><span class="s2">"clickUrl"</span><span class="p">],</span>
|
||
<span class="s2">"title"</span><span class="p">:</span> <span class="n">title</span><span class="p">,</span>
|
||
<span class="s2">"content"</span><span class="p">:</span> <span class="n">content</span><span class="p">,</span>
|
||
<span class="s2">"publishedDate"</span><span class="p">:</span> <span class="n">publishedDate</span><span class="p">,</span>
|
||
<span class="s2">"thumbnail"</span><span class="p">:</span> <span class="n">thumbnailUrl</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">_get_image_result</span><span class="p">(</span><span class="n">result</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">t</span><span class="o">.</span><span class="n">Any</span><span class="p">]</span> <span class="o">|</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">url</span> <span class="o">=</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"altClickUrl"</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">url</span><span class="p">:</span>
|
||
<span class="k">return</span> <span class="kc">None</span>
|
||
|
||
<span class="n">thumbnailUrl</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"thumbnailUrl"</span><span class="p">):</span>
|
||
<span class="n">thumbnailUrl</span> <span class="o">=</span> <span class="n">base_url</span> <span class="o">+</span> <span class="n">result</span><span class="p">[</span><span class="s2">"thumbnailUrl"</span><span class="p">]</span>
|
||
|
||
<span class="n">resolution</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"width"</span><span class="p">)</span> <span class="ow">and</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"height"</span><span class="p">):</span>
|
||
<span class="n">resolution</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'width'</span><span class="p">]</span><span class="si">}</span><span class="s2">x</span><span class="si">{</span><span class="n">result</span><span class="p">[</span><span class="s1">'height'</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span>
|
||
|
||
<span class="n">filesize</span> <span class="o">=</span> <span class="kc">None</span>
|
||
<span class="k">if</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"filesize"</span><span class="p">):</span>
|
||
<span class="n">size_str</span> <span class="o">=</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="nb">str</span><span class="o">.</span><span class="n">isdigit</span><span class="p">,</span> <span class="n">result</span><span class="p">[</span><span class="s2">"filesize"</span><span class="p">]))</span>
|
||
<span class="n">filesize</span> <span class="o">=</span> <span class="n">humanize_bytes</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">size_str</span><span class="p">))</span>
|
||
|
||
<span class="k">return</span> <span class="p">{</span>
|
||
<span class="s2">"template"</span><span class="p">:</span> <span class="s2">"images.html"</span><span class="p">,</span>
|
||
<span class="s2">"url"</span><span class="p">:</span> <span class="n">url</span><span class="p">,</span>
|
||
<span class="s2">"title"</span><span class="p">:</span> <span class="n">html_to_text</span><span class="p">(</span><span class="n">result</span><span class="p">[</span><span class="s2">"title"</span><span class="p">]),</span>
|
||
<span class="s2">"content"</span><span class="p">:</span> <span class="s2">""</span><span class="p">,</span>
|
||
<span class="s2">"img_src"</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"rawImageUrl"</span><span class="p">),</span>
|
||
<span class="s2">"thumbnail_src"</span><span class="p">:</span> <span class="n">thumbnailUrl</span><span class="p">,</span>
|
||
<span class="s2">"resolution"</span><span class="p">:</span> <span class="n">resolution</span><span class="p">,</span>
|
||
<span class="s2">"img_format"</span><span class="p">:</span> <span class="n">result</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"format"</span><span class="p">),</span>
|
||
<span class="s2">"filesize"</span><span class="p">:</span> <span class="n">filesize</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">response</span><span class="p">(</span><span class="n">resp</span><span class="p">):</span>
|
||
<span class="n">categ</span> <span class="o">=</span> <span class="n">startpage_categ</span><span class="o">.</span><span class="n">capitalize</span><span class="p">()</span>
|
||
<span class="n">results_raw</span> <span class="o">=</span> <span class="s2">"{"</span> <span class="o">+</span> <span class="n">extr</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">,</span> <span class="sa">f</span><span class="s2">"React.createElement(UIStartpage.AppSerp</span><span class="si">{</span><span class="n">categ</span><span class="si">}</span><span class="s2">, </span><span class="se">{{</span><span class="s2">"</span><span class="p">,</span> <span class="s2">"}})"</span><span class="p">)</span> <span class="o">+</span> <span class="s2">"}}"</span>
|
||
|
||
<span class="k">if</span> <span class="n">resp</span><span class="o">.</span><span class="n">headers</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"Location"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">"https://www.startpage.com/sp/captcha"</span><span class="p">):</span>
|
||
<span class="k">raise</span> <span class="n">SearxEngineCaptchaException</span><span class="p">()</span>
|
||
|
||
<span class="n">results_json</span> <span class="o">=</span> <span class="n">loads</span><span class="p">(</span><span class="n">results_raw</span><span class="p">)</span>
|
||
<span class="n">results_obj</span> <span class="o">=</span> <span class="n">results_json</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"render"</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"presenter"</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"regions"</span><span class="p">,</span> <span class="p">{})</span>
|
||
|
||
<span class="n">results</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="k">for</span> <span class="n">results_categ</span> <span class="ow">in</span> <span class="n">results_obj</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"mainline"</span><span class="p">,</span> <span class="p">[]):</span>
|
||
<span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">results_categ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"results"</span><span class="p">,</span> <span class="p">[]):</span>
|
||
<span class="k">if</span> <span class="n">results_categ</span><span class="p">[</span><span class="s2">"display_type"</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"web-google"</span><span class="p">:</span>
|
||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_get_web_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
|
||
<span class="k">elif</span> <span class="n">results_categ</span><span class="p">[</span><span class="s2">"display_type"</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"news-bing"</span><span class="p">:</span>
|
||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">_get_news_result</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
|
||
<span class="k">elif</span> <span class="s2">"images"</span> <span class="ow">in</span> <span class="n">results_categ</span><span class="p">[</span><span class="s2">"display_type"</span><span class="p">]:</span>
|
||
<span class="n">item</span> <span class="o">=</span> <span class="n">_get_image_result</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">item</span><span class="p">:</span>
|
||
<span class="n">results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
|
||
|
||
<span class="k">return</span> <span class="n">results</span>
|
||
|
||
|
||
<div class="viewcode-block" id="fetch_traits">
|
||
<a class="viewcode-back" href="../../../dev/engines/online/startpage.html#searx.engines.startpage.fetch_traits">[docs]</a>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">fetch_traits</span><span class="p">(</span><span class="n">engine_traits</span><span class="p">:</span> <span class="n">EngineTraits</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""Fetch :ref:`languages <startpage languages>` and :ref:`regions <startpage</span>
|
||
<span class="sd"> regions>` from Startpage."""</span>
|
||
<span class="c1"># pylint: disable=too-many-branches</span>
|
||
|
||
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="s2">"User-Agent"</span><span class="p">:</span> <span class="n">gen_useragent</span><span class="p">(),</span>
|
||
<span class="s2">"Accept-Language"</span><span class="p">:</span> <span class="s2">"en-US,en;q=0.5"</span><span class="p">,</span> <span class="c1"># bing needs to set the English language</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="n">resp</span> <span class="o">=</span> <span class="n">get</span><span class="p">(</span>
|
||
<span class="s2">"https://www.startpage.com/do/settings"</span><span class="p">,</span>
|
||
<span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">,</span>
|
||
<span class="n">timeout</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
|
||
<span class="p">)</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">resp</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span>
|
||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="s2">"Response from Startpage is not OK."</span><span class="p">)</span>
|
||
|
||
<span class="n">dom</span> <span class="o">=</span> <span class="n">lxml</span><span class="o">.</span><span class="n">html</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="n">resp</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||
|
||
<span class="c1"># regions</span>
|
||
|
||
<span class="n">sp_region_names</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//form[@name="settings"]//select[@name="search_results_region"]/option'</span><span class="p">):</span>
|
||
<span class="n">sp_region_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">))</span>
|
||
|
||
<span class="k">for</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">sp_region_names</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="o">==</span> <span class="s2">"all"</span><span class="p">:</span>
|
||
<span class="k">continue</span>
|
||
<span class="n">babel_region_tag</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"no_NO"</span><span class="p">:</span> <span class="s2">"nb_NO"</span><span class="p">}</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">)</span> <span class="c1"># norway</span>
|
||
|
||
<span class="k">if</span> <span class="s2">"-"</span> <span class="ow">in</span> <span class="n">babel_region_tag</span><span class="p">:</span> <span class="c1"># pyright: ignore[reportOperatorIssue]</span>
|
||
<span class="n">l</span><span class="p">,</span> <span class="n">r</span> <span class="o">=</span> <span class="n">babel_region_tag</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"-"</span><span class="p">)</span>
|
||
<span class="n">r</span> <span class="o">=</span> <span class="n">r</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"_"</span><span class="p">)[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">l</span> <span class="o">+</span> <span class="s2">"_"</span> <span class="o">+</span> <span class="n">r</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">"_"</span><span class="p">))</span>
|
||
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="k">try</span><span class="p">:</span>
|
||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">region_tag</span><span class="p">(</span><span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">babel_region_tag</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">"_"</span><span class="p">))</span>
|
||
|
||
<span class="k">except</span> <span class="n">babel</span><span class="o">.</span><span class="n">UnknownLocaleError</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"IGNORE: can't determine babel locale of startpage's locale </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">eng_tag</span><span class="p">)</span>
|
||
<span class="k">continue</span>
|
||
|
||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||
<span class="k">continue</span>
|
||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">regions</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span>
|
||
|
||
<span class="c1"># languages</span>
|
||
|
||
<span class="n">catalog_engine2code</span> <span class="o">=</span> <span class="p">{</span><span class="n">name</span><span class="o">.</span><span class="n">lower</span><span class="p">():</span> <span class="n">lang_code</span> <span class="k">for</span> <span class="n">lang_code</span><span class="p">,</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">(</span><span class="s2">"en"</span><span class="p">)</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||
|
||
<span class="c1"># get the native name of every language known by babel</span>
|
||
|
||
<span class="k">for</span> <span class="n">lang_code</span> <span class="ow">in</span> <span class="nb">filter</span><span class="p">(</span>
|
||
<span class="k">lambda</span> <span class="n">lang_code</span><span class="p">:</span> <span class="n">lang_code</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s2">"_"</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
||
<span class="n">babel</span><span class="o">.</span><span class="n">localedata</span><span class="o">.</span><span class="n">locale_identifiers</span><span class="p">(),</span>
|
||
<span class="p">):</span>
|
||
<span class="n">native_name</span> <span class="o">=</span> <span class="n">babel</span><span class="o">.</span><span class="n">Locale</span><span class="p">(</span><span class="n">lang_code</span><span class="p">)</span><span class="o">.</span><span class="n">get_language_name</span><span class="p">()</span>
|
||
<span class="k">if</span> <span class="ow">not</span> <span class="n">native_name</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IGNORE: language name of startpage's language </span><span class="si">{</span><span class="n">lang_code</span><span class="si">}</span><span class="s2"> is unknown by babel"</span><span class="p">)</span>
|
||
<span class="k">continue</span>
|
||
<span class="n">native_name</span> <span class="o">=</span> <span class="n">native_name</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
||
<span class="c1"># add native name exactly as it is</span>
|
||
<span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">native_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_code</span>
|
||
|
||
<span class="c1"># add "normalized" language name (i.e. français becomes francais and español becomes espanol)</span>
|
||
<span class="n">unaccented_name</span> <span class="o">=</span> <span class="s2">""</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">c</span><span class="p">:</span> <span class="ow">not</span> <span class="n">combining</span><span class="p">(</span><span class="n">c</span><span class="p">),</span> <span class="n">normalize</span><span class="p">(</span><span class="s2">"NFKD"</span><span class="p">,</span> <span class="n">native_name</span><span class="p">)))</span>
|
||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">unaccented_name</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">unaccented_name</span><span class="o">.</span><span class="n">encode</span><span class="p">()):</span>
|
||
<span class="c1"># add only if result is ascii (otherwise "normalization" didn't work)</span>
|
||
<span class="n">catalog_engine2code</span><span class="p">[</span><span class="n">unaccented_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">lang_code</span>
|
||
|
||
<span class="c1"># values that can't be determined by babel's languages names</span>
|
||
|
||
<span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">update</span><span class="p">(</span>
|
||
<span class="p">{</span>
|
||
<span class="c1"># Brazilian Portuguese</span>
|
||
<span class="s2">"brazilian"</span><span class="p">:</span> <span class="s2">"pt_BR"</span><span class="p">,</span>
|
||
<span class="c1"># traditional chinese used in ..</span>
|
||
<span class="s2">"fantizhengwen"</span><span class="p">:</span> <span class="s2">"zh_Hant"</span><span class="p">,</span>
|
||
<span class="c1"># Korean alphabet</span>
|
||
<span class="s2">"hangul"</span><span class="p">:</span> <span class="s2">"ko"</span><span class="p">,</span>
|
||
<span class="c1"># Malayalam is one of 22 scheduled languages of India.</span>
|
||
<span class="s2">"malayam"</span><span class="p">:</span> <span class="s2">"ml"</span><span class="p">,</span>
|
||
<span class="s2">"norsk"</span><span class="p">:</span> <span class="s2">"nb"</span><span class="p">,</span>
|
||
<span class="s2">"sinhalese"</span><span class="p">:</span> <span class="s2">"si"</span><span class="p">,</span>
|
||
<span class="p">}</span>
|
||
<span class="p">)</span>
|
||
|
||
<span class="n">skip_eng_tags</span> <span class="o">=</span> <span class="p">{</span>
|
||
<span class="s2">"english_uk"</span><span class="p">,</span> <span class="c1"># SearXNG lang 'en' already maps to 'english'</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="k">for</span> <span class="n">option</span> <span class="ow">in</span> <span class="n">dom</span><span class="o">.</span><span class="n">xpath</span><span class="p">(</span><span class="s1">'//form[@name="settings"]//select[@name="language"]/option'</span><span class="p">):</span>
|
||
<span class="n">eng_tag</span> <span class="o">=</span> <span class="n">option</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"value"</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">eng_tag</span> <span class="ow">in</span> <span class="n">skip_eng_tags</span><span class="p">:</span>
|
||
<span class="k">continue</span>
|
||
<span class="n">name</span> <span class="o">=</span> <span class="n">extract_text</span><span class="p">(</span><span class="n">option</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="c1"># type: ignore</span>
|
||
|
||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">eng_tag</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">sxng_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="n">sxng_tag</span> <span class="o">=</span> <span class="n">catalog_engine2code</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">sxng_tag</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||
<span class="c1"># silently ignore unknown languages</span>
|
||
<span class="k">continue</span>
|
||
|
||
<span class="n">conflict</span> <span class="o">=</span> <span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sxng_tag</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">conflict</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="n">conflict</span> <span class="o">!=</span> <span class="n">eng_tag</span><span class="p">:</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="s2">"CONFLICT: babel </span><span class="si">%s</span><span class="s2"> --> </span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="p">(</span><span class="n">sxng_tag</span><span class="p">,</span> <span class="n">conflict</span><span class="p">,</span> <span class="n">eng_tag</span><span class="p">))</span>
|
||
<span class="k">continue</span>
|
||
<span class="n">engine_traits</span><span class="o">.</span><span class="n">languages</span><span class="p">[</span><span class="n">sxng_tag</span><span class="p">]</span> <span class="o">=</span> <span class="n">eng_tag</span></div>
|
||
|
||
</pre></div>
|
||
</article>
|
||
</div>
|
||
<footer>
|
||
|
||
<div class="related-pages">
|
||
|
||
|
||
</div>
|
||
<div class="bottom-of-page">
|
||
<div class="left-details">
|
||
<div class="copyright">
|
||
Copyright © SearXNG team
|
||
</div>
|
||
Made with
|
||
<a href="https://github.com/pradyunsg/furo">Furo</a>
|
||
|
||
</div>
|
||
<div class="right-details">
|
||
<div class="icons">
|
||
<a class="muted-link " href="https://github.com/searxng/searxng/" aria-label="GitHub">💾</a>
|
||
<a class="muted-link " href="https://searx.space/" aria-label="searx.space">🌐</a>
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</footer>
|
||
</div>
|
||
<aside class="toc-drawer no-toc">
|
||
|
||
|
||
|
||
</aside>
|
||
</div>
|
||
</div><script src="../../../_static/documentation_options.js?v=9e1305a3"></script>
|
||
<script src="../../../_static/doctools.js?v=fd6eb6e6"></script>
|
||
<script src="../../../_static/sphinx_highlight.js?v=6ffebe34"></script>
|
||
<script src="../../../_static/scripts/furo.js?v=46bd48cc"></script>
|
||
</body>
|
||
</html> |