mirror of
https://github.com/searxng/searxng.git
synced 2026-06-04 00:47:17 +02:00
[enh] data: traits population
Job failing since October 2025. enh: always raise and reuse data fix: brave unknown locale fix: startpage add "brazilian"
This commit is contained in:
committed by
Markus Heiser
parent
a9f3baefe6
commit
2b03a61832
+26
-30
@@ -166,30 +166,27 @@ Terms / phrases that you keep coming across:
|
||||
"""
|
||||
# pylint: disable=global-statement
|
||||
|
||||
import typing as t
|
||||
import json
|
||||
import re
|
||||
import typing as t
|
||||
|
||||
import babel
|
||||
import lxml.html
|
||||
|
||||
from searx import locales
|
||||
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.external_bang import EXTERNAL_BANGS, get_node # type: ignore
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import (
|
||||
ElementType,
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
extr,
|
||||
extract_text,
|
||||
ElementType,
|
||||
gen_useragent,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.enginelib import EngineCache
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.result_types import EngineResults
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
@@ -355,7 +352,7 @@ def quote_ddg_bangs(query: str) -> str:
|
||||
if not val.strip():
|
||||
continue
|
||||
|
||||
if val.startswith('!') and get_node(EXTERNAL_BANGS, val[1:]):
|
||||
if val.startswith("!") and get_node(EXTERNAL_BANGS, val[1:]):
|
||||
val = f"'{val}'"
|
||||
_q.append(val)
|
||||
return " ".join(_q)
|
||||
@@ -412,7 +409,8 @@ def request(query: str, params: "OnlineParams") -> None:
|
||||
# reputation of the SearXNG IP and DDG starts to activate CAPTCHAs.
|
||||
# set suspend time to zero is OK --> ddg does not block the IP
|
||||
raise SearxEngineCaptchaException(
|
||||
suspended_time=0, message=f"VQD missed (page: {params['pageno']}, locale: {params['searxng_locale']})"
|
||||
suspended_time=0,
|
||||
message=f"VQD missed (page: {params['pageno']}, locale: {params['searxng_locale']})",
|
||||
)
|
||||
|
||||
if params["searxng_locale"].startswith("zh"):
|
||||
@@ -536,34 +534,34 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
"""
|
||||
# pylint: disable=too-many-branches, too-many-statements, disable=import-outside-toplevel
|
||||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.utils import js_obj_str_to_python
|
||||
|
||||
# fetch regions
|
||||
|
||||
engine_traits.all_locale = 'wt-wt'
|
||||
engine_traits.all_locale = "wt-wt"
|
||||
|
||||
# updated from u661.js to u.7669f071a13a7daa57cb / should be updated automatically?
|
||||
resp = get('https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js')
|
||||
|
||||
resp = get("https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js", timeout=5)
|
||||
if not resp.ok:
|
||||
print("ERROR: response from DuckDuckGo is not OK.")
|
||||
raise RuntimeError("Response from DuckDuckGo regions is not OK.")
|
||||
|
||||
js_code = extr(resp.text, 'regions:', ',snippetLengths')
|
||||
js_code = extr(resp.text, "regions:", ",snippetLengths")
|
||||
|
||||
regions = json.loads(js_code)
|
||||
for eng_tag, name in regions.items():
|
||||
|
||||
if eng_tag == 'wt-wt':
|
||||
engine_traits.all_locale = 'wt-wt'
|
||||
if eng_tag == "wt-wt":
|
||||
engine_traits.all_locale = "wt-wt"
|
||||
continue
|
||||
|
||||
region = ddg_reg_map.get(eng_tag)
|
||||
if region == 'skip':
|
||||
if region == "skip":
|
||||
continue
|
||||
|
||||
if not region:
|
||||
eng_territory, eng_lang = eng_tag.split('-')
|
||||
region = eng_lang + '_' + eng_territory.upper()
|
||||
eng_territory, eng_lang = eng_tag.split("-")
|
||||
region = eng_lang + "_" + eng_territory.upper()
|
||||
|
||||
try:
|
||||
sxng_tag = locales.region_tag(babel.Locale.parse(region))
|
||||
@@ -580,25 +578,23 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
# fetch languages
|
||||
|
||||
engine_traits.custom['lang_region'] = {}
|
||||
engine_traits.custom["lang_region"] = {}
|
||||
|
||||
js_code = extr(resp.text, 'languages:', ',regions')
|
||||
js_code = extr(resp.text, "languages:", ",regions")
|
||||
|
||||
languages: dict[str, str] = js_obj_str_to_python(js_code)
|
||||
for eng_lang, name in languages.items():
|
||||
|
||||
if eng_lang == 'wt_WT':
|
||||
if eng_lang == "wt_WT":
|
||||
continue
|
||||
|
||||
babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
|
||||
if babel_tag == 'skip':
|
||||
if babel_tag == "skip":
|
||||
continue
|
||||
|
||||
try:
|
||||
|
||||
if babel_tag == 'lang_region':
|
||||
if babel_tag == "lang_region":
|
||||
sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
|
||||
engine_traits.custom['lang_region'][sxng_tag] = eng_lang
|
||||
engine_traits.custom["lang_region"][sxng_tag] = eng_lang
|
||||
continue
|
||||
|
||||
sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
|
||||
|
||||
Reference in New Issue
Block a user