[feat] engines: add luxxle (general, news, images, videos)

Add support for https://luxxle.com

Localization is not yet supported because it doesn't seem to work on their
website either, no matter which language I select, it only returns English web
results
This commit is contained in:
Bnyro
2026-06-10 23:14:29 +02:00
parent 2e10a2f614
commit a29cda858c
2 changed files with 242 additions and 0 deletions
+210
View File
@@ -0,0 +1,210 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Luxxle_ is an American search engine focusing on providing "unbiased"
results.
.. _Luxxle: https://luxxle.com
"""
from json import dumps
from urllib.parse import quote_plus, unquote_plus
import typing as t
from lxml import html
from searx.result_types import EngineResults
from searx.network import get
from searx.utils import (
extr,
gen_useragent,
eval_xpath_list,
extract_text,
eval_xpath,
parse_duration_string,
ElementType,
)
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
from searx.extended_types import SXNG_Response
about = {
"website": "https://luxxle.com",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
categories = []
safeseach = True
base_url = "https://luxxle.com"
luxxle_categ = "search"
"""Supported categories: "search", "news", "images", "videos"."""
# otherwise all requests get blocked (http2-fingerprinted probably)
enable_http2 = False
safe_search_map = {0: "Off", 1: "Moderate", 2: "Strict"}
def init(_):
if luxxle_categ not in ("search", "images", "videos", "news"):
raise ValueError("invalid luxxle category: %s" % luxxle_categ)
def _obtain_telemetry_data(query: str) -> dict[str, str]:
"""This data is required for sending search queries.
The luxsearch page (for general results) has a JS dict called ``telemetryData``
that contains all the important info, but the others don't, so we don't use it
here. But it's useful to understand which info is needed.
.. code-block:: javascript
var telemetryData = {
errorInformation: errorInformation,
query: "youapps club",
ip: "10.10.10.10",
timeOf: "1781119224",
authorization: "db889e0ae67d3c320858ad97f51cc4f0a4d8e1913c4f5ebe5d2eafef606521dd",
};
This data is only valid for very short times
"""
resp = get(
f"{base_url}/lux{luxxle_categ}?q={quote_plus(query)}", headers={"User-Agent": gen_useragent(), "Sec-GPC": "1"}
)
def extr_js_variable(name: str) -> str:
val = extr(resp.text, f"var {name} = \"", "\";")
if not val:
val = extr(resp.text, f"var {name} = '", "';")
return val
return {
"ip": extr_js_variable("ip"),
"timeOf": extr_js_variable("timeOf"),
"authorization": extr_js_variable("authorization"),
"preferencesCookie": extr_js_variable("preferencesCookie"),
}
def request(query: str, params: "OnlineParams") -> None:
telemetry_data = _obtain_telemetry_data(query)
market = params["searxng_locale"]
if market == "all":
market = "en-US"
params["url"] = f"{base_url}/load_{luxxle_categ}.php"
search_data = {
**telemetry_data,
"query": query,
"market": market,
"safeSearch": safe_search_map[params["safesearch"]],
"freshness": "",
"language": "english", # UI language
}
if luxxle_categ == "images":
# for some reason this is sent as form data
params["data"] = {"searchData": dumps(search_data)}
else:
params["json"] = {"searchData": search_data}
params["method"] = "POST"
def _extract_url_from_redirect(url: str):
# urls usually look like "/redirect?url=<url>"
query_start_idx = url.find("?url=")
if query_start_idx < 0:
return url
url_start_idx = query_start_idx + len("?url=")
return unquote_plus(url[url_start_idx:])
def _general_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(doc, "//div[@id='mainResults']/div[contains(@class, 'resultsContainer')]"):
res.add(
res.types.MainResult(
url=_extract_url_from_redirect(
extract_text(eval_xpath(result, "./div[contains(@class, 'urlAddressLink')]/a/@href")) or ""
),
title=extract_text(eval_xpath(result, "./div[contains(@class, 'urlname')]")) or "",
content=extract_text(eval_xpath(result, "./div[contains(@class, 'urlSnippet')]")) or "",
)
)
def _news_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(
doc, "//div[contains(@class, 'newsResults')]/div[contains(@class, 'mediaResultNewsPage')]"
):
res.add(
res.types.MainResult(
url=_extract_url_from_redirect(
extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageTitle')]/a/@href"))
or ""
),
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageTitle')]/a")) or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageDescription')]"))
or "",
thumbnail=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultThumbnail')]//img/@src"))
or "",
)
)
def _video_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(doc, "//div[@id='mainResults']/div[contains(@class, 'mediaResult')]"):
res.add(
res.types.MainResult(
template="videos.html",
url=extract_text(eval_xpath(result, "./@data-url")) or "",
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultTitleVideo')]/a")) or "",
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultDescription')]")) or "",
thumbnail=extract_text(eval_xpath(result, ".//img[contains(@class, 'videoThumbnail')]/@src")) or "",
author=extract_text(eval_xpath(result, ".//div[contains(@class, 'videoCreator')]")) or "",
length=parse_duration_string(
extract_text(eval_xpath(result, ".//span[contains(@class, 'mediaResultDuration')]")) or ""
),
)
)
def _image_results(doc: ElementType, res: EngineResults):
for result in eval_xpath_list(doc, "//div[contains(@class, 'imageResultsWrapper')]/div"):
res.add(
res.types.Image(
url=_extract_url_from_redirect(
extract_text(eval_xpath(result, ".//a[contains(@class, 'imageResultSource')]/@href")) or ""
),
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'imageResultTitle')]")) or "",
source=extract_text(eval_xpath(result, ".//div[contains(@class, 'imageResultSource')]")) or "",
thumbnail_src=extract_text(eval_xpath(result, "./@data-thumbnail-src")) or "",
img_src=extract_text(eval_xpath(result, "./@data-image-src")) or "",
)
)
def response(resp: "SXNG_Response") -> EngineResults:
doc = html.fromstring(resp.text)
res = EngineResults()
match luxxle_categ:
case "search":
_general_results(doc, res)
case "images":
_image_results(doc, res)
case "videos":
_video_results(doc, res)
case "news":
_news_results(doc, res)
case _:
raise ValueError("unsupported category: %s" % luxxle_categ)
return res
+32
View File
@@ -1477,6 +1477,38 @@ engines:
shortcut: luc shortcut: luc
timeout: 3.0 timeout: 3.0
- name: luxxle
engine: luxxle
categories: general
luxxle_categ: search
shortcut: lux
disabled: true
inactive: true
- name: luxxle images
engine: luxxle
categories: images
luxxle_categ: images
shortcut: luxi
disabled: true
inactive: true
- name: luxxle videos
engine: luxxle
categories: videos
luxxle_categ: videos
shortcut: luxv
disabled: true
inactive: true
- name: luxxle news
engine: luxxle
categories: news
luxxle_categ: news
shortcut: luxn
disabled: true
inactive: true
- name: marginalia - name: marginalia
engine: marginalia engine: marginalia
shortcut: mar shortcut: mar