mirror of
https://github.com/searxng/searxng.git
synced 2026-06-03 16:37:16 +02:00
[enh] data: traits population
Job failing since October 2025. enh: always raise and reuse data fix: brave unknown locale fix: startpage add "brazilian"
This commit is contained in:
committed by
Markus Heiser
parent
a9f3baefe6
commit
2b03a61832
@@ -10,29 +10,33 @@ Dailymotion (Videos)
|
||||
|
||||
"""
|
||||
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlencode
|
||||
import time
|
||||
|
||||
import babel
|
||||
|
||||
from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.utils import html_to_text
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.locales import region_tag, language_tag
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.locales import language_tag, region_tag
|
||||
from searx.network import ( # see https://github.com/searxng/searxng/issues/762
|
||||
get,
|
||||
raise_for_httperror,
|
||||
)
|
||||
from searx.utils import html_to_text
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://www.dailymotion.com',
|
||||
"wikidata_id": 'Q769222',
|
||||
"official_api_documentation": 'https://www.dailymotion.com/developer',
|
||||
"website": "https://www.dailymotion.com",
|
||||
"wikidata_id": "Q769222",
|
||||
"official_api_documentation": "https://www.dailymotion.com/developer",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['videos']
|
||||
categories = ["videos"]
|
||||
paging = True
|
||||
number_of_results = 10
|
||||
|
||||
@@ -46,8 +50,8 @@ time_delta_dict = {
|
||||
|
||||
safesearch = True
|
||||
safesearch_params = {
|
||||
2: {'is_created_for_kids': 'true'},
|
||||
1: {'is_created_for_kids': 'true'},
|
||||
2: {"is_created_for_kids": "true"},
|
||||
1: {"is_created_for_kids": "true"},
|
||||
0: {},
|
||||
}
|
||||
"""True if this video is "Created for Kids" / intends to target an audience
|
||||
@@ -55,9 +59,9 @@ under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
|
||||
"""
|
||||
|
||||
family_filter_map = {
|
||||
2: 'true',
|
||||
1: 'true',
|
||||
0: 'false',
|
||||
2: "true",
|
||||
1: "true",
|
||||
0: "false",
|
||||
}
|
||||
"""By default, the family filter is turned on. Setting this parameter to
|
||||
``false`` will stop filtering-out explicit content from searches and global
|
||||
@@ -65,21 +69,21 @@ contexts (``family_filter`` in `Global API Parameters`_ ).
|
||||
"""
|
||||
|
||||
result_fields = [
|
||||
'allow_embed',
|
||||
'description',
|
||||
'title',
|
||||
'created_time',
|
||||
'duration',
|
||||
'url',
|
||||
'thumbnail_360_url',
|
||||
'id',
|
||||
"allow_embed",
|
||||
"description",
|
||||
"title",
|
||||
"created_time",
|
||||
"duration",
|
||||
"url",
|
||||
"thumbnail_360_url",
|
||||
"id",
|
||||
]
|
||||
"""`Fields selection`_, by default, a few fields are returned. To request more
|
||||
specific fields, the ``fields`` parameter is used with the list of fields
|
||||
SearXNG needs in the response to build a video result list.
|
||||
"""
|
||||
|
||||
search_url = 'https://api.dailymotion.com/videos?'
|
||||
search_url = "https://api.dailymotion.com/videos?"
|
||||
"""URL to retrieve a list of videos.
|
||||
|
||||
- `REST GET`_
|
||||
@@ -96,42 +100,42 @@ def request(query, params):
|
||||
if not query:
|
||||
return False
|
||||
|
||||
eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore
|
||||
eng_lang = traits.get_language(params['searxng_locale'], 'en')
|
||||
eng_region: str = traits.get_region(params["searxng_locale"], "en_US") # type: ignore
|
||||
eng_lang = traits.get_language(params["searxng_locale"], "en")
|
||||
|
||||
args = {
|
||||
'search': query,
|
||||
'family_filter': family_filter_map.get(params['safesearch'], 'false'),
|
||||
'thumbnail_ratio': 'original', # original|widescreen|square
|
||||
"search": query,
|
||||
"family_filter": family_filter_map.get(params["safesearch"], "false"),
|
||||
"thumbnail_ratio": "original", # original|widescreen|square
|
||||
# https://developers.dailymotion.com/api/#video-filters
|
||||
'languages': eng_lang,
|
||||
'page': params['pageno'],
|
||||
'password_protected': 'false',
|
||||
'private': 'false',
|
||||
'sort': 'relevance',
|
||||
'limit': number_of_results,
|
||||
'fields': ','.join(result_fields),
|
||||
"languages": eng_lang,
|
||||
"page": params["pageno"],
|
||||
"password_protected": "false",
|
||||
"private": "false",
|
||||
"sort": "relevance",
|
||||
"limit": number_of_results,
|
||||
"fields": ",".join(result_fields),
|
||||
}
|
||||
|
||||
args.update(safesearch_params.get(params['safesearch'], {}))
|
||||
args.update(safesearch_params.get(params["safesearch"], {}))
|
||||
|
||||
# Don't add localization and country arguments if the user does select a
|
||||
# language (:de, :en, ..)
|
||||
|
||||
if len(params['searxng_locale'].split('-')) > 1:
|
||||
if len(params["searxng_locale"].split("-")) > 1:
|
||||
# https://developers.dailymotion.com/api/#global-parameters
|
||||
args['localization'] = eng_region
|
||||
args['country'] = eng_region.split('_')[1]
|
||||
args["localization"] = eng_region
|
||||
args["country"] = eng_region.split("_")[1]
|
||||
# Insufficient rights for the `ams_country' parameter of route `GET /videos'
|
||||
# 'ams_country': eng_region.split('_')[1],
|
||||
|
||||
time_delta = time_delta_dict.get(params["time_range"])
|
||||
if time_delta:
|
||||
created_after = datetime.now() - time_delta
|
||||
args['created_after'] = datetime.timestamp(created_after)
|
||||
args["created_after"] = datetime.timestamp(created_after)
|
||||
|
||||
query_str = urlencode(args)
|
||||
params['url'] = search_url + query_str
|
||||
params["url"] = search_url + query_str
|
||||
|
||||
return params
|
||||
|
||||
@@ -143,46 +147,45 @@ def response(resp):
|
||||
search_res = resp.json()
|
||||
|
||||
# check for an API error
|
||||
if 'error' in search_res:
|
||||
raise SearxEngineAPIException(search_res['error'].get('message'))
|
||||
if "error" in search_res:
|
||||
raise SearxEngineAPIException(search_res["error"].get("message"))
|
||||
|
||||
raise_for_httperror(resp)
|
||||
|
||||
# parse results
|
||||
for res in search_res.get('list', []):
|
||||
for res in search_res.get("list", []):
|
||||
title = res["title"]
|
||||
url = res["url"]
|
||||
|
||||
title = res['title']
|
||||
url = res['url']
|
||||
|
||||
content = html_to_text(res['description'])
|
||||
content = html_to_text(res["description"])
|
||||
if len(content) > 300:
|
||||
content = content[:300] + '...'
|
||||
content = content[:300] + "..."
|
||||
|
||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
||||
publishedDate = datetime.fromtimestamp(res["created_time"], None)
|
||||
|
||||
length = time.gmtime(res.get('duration'))
|
||||
length = time.gmtime(res.get("duration"))
|
||||
if length.tm_hour:
|
||||
length = time.strftime("%H:%M:%S", length)
|
||||
else:
|
||||
length = time.strftime("%M:%S", length)
|
||||
|
||||
thumbnail = res['thumbnail_360_url']
|
||||
thumbnail = res["thumbnail_360_url"]
|
||||
thumbnail = thumbnail.replace("http://", "https://")
|
||||
|
||||
item = {
|
||||
'template': 'videos.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'publishedDate': publishedDate,
|
||||
'length': length,
|
||||
'thumbnail': thumbnail,
|
||||
"template": "videos.html",
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"publishedDate": publishedDate,
|
||||
"length": length,
|
||||
"thumbnail": thumbnail,
|
||||
}
|
||||
|
||||
# HINT: no mater what the value is, without API token videos can't shown
|
||||
# embedded
|
||||
if res['allow_embed']:
|
||||
item['iframe_src'] = iframe_src.format(video_id=res['id'])
|
||||
if res["allow_embed"]:
|
||||
item["iframe_src"] = iframe_src.format(video_id=res["id"])
|
||||
|
||||
results.append(item)
|
||||
|
||||
@@ -208,13 +211,13 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
|
||||
"""
|
||||
|
||||
resp = get('https://api.dailymotion.com/locales')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from dailymotion/locales is not OK.")
|
||||
resp = get("https://api.dailymotion.com/locales", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Dailymotion locales is not OK.")
|
||||
|
||||
for item in resp.json()['list']: # type: ignore
|
||||
eng_tag = item['locale']
|
||||
if eng_tag in ('en_EN', 'ar_AA'):
|
||||
for item in resp.json()["list"]: # type: ignore
|
||||
eng_tag = item["locale"]
|
||||
if eng_tag in ("en_EN", "ar_AA"):
|
||||
continue
|
||||
try:
|
||||
sxng_tag = region_tag(babel.Locale.parse(eng_tag))
|
||||
@@ -229,14 +232,14 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||
continue
|
||||
engine_traits.regions[sxng_tag] = eng_tag
|
||||
|
||||
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
|
||||
locale_lang_list = [x.split("_")[0] for x in engine_traits.regions.values()]
|
||||
|
||||
resp = get('https://api.dailymotion.com/languages')
|
||||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from dailymotion/languages is not OK.")
|
||||
resp = get("https://api.dailymotion.com/languages", timeout=5)
|
||||
if not resp.ok:
|
||||
raise RuntimeError("Response from Dailymotion languages is not OK.")
|
||||
|
||||
for item in resp.json()['list']: # type: ignore
|
||||
eng_tag = item['code']
|
||||
for item in resp.json()["list"]: # type: ignore
|
||||
eng_tag = item["code"]
|
||||
if eng_tag in locale_lang_list:
|
||||
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
||||
engine_traits.languages[sxng_tag] = eng_tag
|
||||
|
||||
Reference in New Issue
Block a user