mirror of
https://github.com/searxng/searxng.git
synced 2026-06-13 21:37:51 +02:00
[feat] engines: add luxxle (general, news, images, videos)
Add support for https://luxxle.com Localization is not yet supported because it doesn't seem to work on their website either, no matter which language I select, it only returns English web results
This commit is contained in:
@@ -0,0 +1,210 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Luxxle_ is an American search engine focusing on providing "unbiased"
|
||||||
|
results.
|
||||||
|
|
||||||
|
.. _Luxxle: https://luxxle.com
|
||||||
|
"""
|
||||||
|
|
||||||
|
from json import dumps
|
||||||
|
from urllib.parse import quote_plus, unquote_plus
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
from searx.network import get
|
||||||
|
from searx.utils import (
|
||||||
|
extr,
|
||||||
|
gen_useragent,
|
||||||
|
eval_xpath_list,
|
||||||
|
extract_text,
|
||||||
|
eval_xpath,
|
||||||
|
parse_duration_string,
|
||||||
|
ElementType,
|
||||||
|
)
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.search.processors import OnlineParams
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
|
||||||
|
|
||||||
|
about = {
|
||||||
|
"website": "https://luxxle.com",
|
||||||
|
"official_api_documentation": None,
|
||||||
|
"use_official_api": False,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": "HTML",
|
||||||
|
}
|
||||||
|
|
||||||
|
categories = []
|
||||||
|
safeseach = True
|
||||||
|
|
||||||
|
base_url = "https://luxxle.com"
|
||||||
|
|
||||||
|
luxxle_categ = "search"
|
||||||
|
"""Supported categories: "search", "news", "images", "videos"."""
|
||||||
|
|
||||||
|
# otherwise all requests get blocked (http2-fingerprinted probably)
|
||||||
|
enable_http2 = False
|
||||||
|
|
||||||
|
|
||||||
|
safe_search_map = {0: "Off", 1: "Moderate", 2: "Strict"}
|
||||||
|
|
||||||
|
|
||||||
|
def init(_):
|
||||||
|
if luxxle_categ not in ("search", "images", "videos", "news"):
|
||||||
|
raise ValueError("invalid luxxle category: %s" % luxxle_categ)
|
||||||
|
|
||||||
|
|
||||||
|
def _obtain_telemetry_data(query: str) -> dict[str, str]:
|
||||||
|
"""This data is required for sending search queries.
|
||||||
|
|
||||||
|
The luxsearch page (for general results) has a JS dict called ``telemetryData``
|
||||||
|
that contains all the important info, but the others don't, so we don't use it
|
||||||
|
here. But it's useful to understand which info is needed.
|
||||||
|
|
||||||
|
.. code-block:: javascript
|
||||||
|
|
||||||
|
var telemetryData = {
|
||||||
|
errorInformation: errorInformation,
|
||||||
|
query: "youapps club",
|
||||||
|
ip: "10.10.10.10",
|
||||||
|
timeOf: "1781119224",
|
||||||
|
authorization: "db889e0ae67d3c320858ad97f51cc4f0a4d8e1913c4f5ebe5d2eafef606521dd",
|
||||||
|
};
|
||||||
|
|
||||||
|
This data is only valid for very short times
|
||||||
|
"""
|
||||||
|
resp = get(
|
||||||
|
f"{base_url}/lux{luxxle_categ}?q={quote_plus(query)}", headers={"User-Agent": gen_useragent(), "Sec-GPC": "1"}
|
||||||
|
)
|
||||||
|
|
||||||
|
def extr_js_variable(name: str) -> str:
|
||||||
|
val = extr(resp.text, f"var {name} = \"", "\";")
|
||||||
|
if not val:
|
||||||
|
val = extr(resp.text, f"var {name} = '", "';")
|
||||||
|
return val
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ip": extr_js_variable("ip"),
|
||||||
|
"timeOf": extr_js_variable("timeOf"),
|
||||||
|
"authorization": extr_js_variable("authorization"),
|
||||||
|
"preferencesCookie": extr_js_variable("preferencesCookie"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def request(query: str, params: "OnlineParams") -> None:
|
||||||
|
telemetry_data = _obtain_telemetry_data(query)
|
||||||
|
|
||||||
|
market = params["searxng_locale"]
|
||||||
|
if market == "all":
|
||||||
|
market = "en-US"
|
||||||
|
|
||||||
|
params["url"] = f"{base_url}/load_{luxxle_categ}.php"
|
||||||
|
search_data = {
|
||||||
|
**telemetry_data,
|
||||||
|
"query": query,
|
||||||
|
"market": market,
|
||||||
|
"safeSearch": safe_search_map[params["safesearch"]],
|
||||||
|
"freshness": "",
|
||||||
|
"language": "english", # UI language
|
||||||
|
}
|
||||||
|
if luxxle_categ == "images":
|
||||||
|
# for some reason this is sent as form data
|
||||||
|
params["data"] = {"searchData": dumps(search_data)}
|
||||||
|
else:
|
||||||
|
params["json"] = {"searchData": search_data}
|
||||||
|
params["method"] = "POST"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_url_from_redirect(url: str):
|
||||||
|
# urls usually look like "/redirect?url=<url>"
|
||||||
|
query_start_idx = url.find("?url=")
|
||||||
|
if query_start_idx < 0:
|
||||||
|
return url
|
||||||
|
|
||||||
|
url_start_idx = query_start_idx + len("?url=")
|
||||||
|
return unquote_plus(url[url_start_idx:])
|
||||||
|
|
||||||
|
|
||||||
|
def _general_results(doc: ElementType, res: EngineResults):
|
||||||
|
for result in eval_xpath_list(doc, "//div[@id='mainResults']/div[contains(@class, 'resultsContainer')]"):
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
url=_extract_url_from_redirect(
|
||||||
|
extract_text(eval_xpath(result, "./div[contains(@class, 'urlAddressLink')]/a/@href")) or ""
|
||||||
|
),
|
||||||
|
title=extract_text(eval_xpath(result, "./div[contains(@class, 'urlname')]")) or "",
|
||||||
|
content=extract_text(eval_xpath(result, "./div[contains(@class, 'urlSnippet')]")) or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _news_results(doc: ElementType, res: EngineResults):
|
||||||
|
for result in eval_xpath_list(
|
||||||
|
doc, "//div[contains(@class, 'newsResults')]/div[contains(@class, 'mediaResultNewsPage')]"
|
||||||
|
):
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
url=_extract_url_from_redirect(
|
||||||
|
extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageTitle')]/a/@href"))
|
||||||
|
or ""
|
||||||
|
),
|
||||||
|
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageTitle')]/a")) or "",
|
||||||
|
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultNewsPageDescription')]"))
|
||||||
|
or "",
|
||||||
|
thumbnail=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultThumbnail')]//img/@src"))
|
||||||
|
or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _video_results(doc: ElementType, res: EngineResults):
|
||||||
|
for result in eval_xpath_list(doc, "//div[@id='mainResults']/div[contains(@class, 'mediaResult')]"):
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
template="videos.html",
|
||||||
|
url=extract_text(eval_xpath(result, "./@data-url")) or "",
|
||||||
|
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultTitleVideo')]/a")) or "",
|
||||||
|
content=extract_text(eval_xpath(result, ".//div[contains(@class, 'mediaResultDescription')]")) or "",
|
||||||
|
thumbnail=extract_text(eval_xpath(result, ".//img[contains(@class, 'videoThumbnail')]/@src")) or "",
|
||||||
|
author=extract_text(eval_xpath(result, ".//div[contains(@class, 'videoCreator')]")) or "",
|
||||||
|
length=parse_duration_string(
|
||||||
|
extract_text(eval_xpath(result, ".//span[contains(@class, 'mediaResultDuration')]")) or ""
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _image_results(doc: ElementType, res: EngineResults):
|
||||||
|
for result in eval_xpath_list(doc, "//div[contains(@class, 'imageResultsWrapper')]/div"):
|
||||||
|
res.add(
|
||||||
|
res.types.Image(
|
||||||
|
url=_extract_url_from_redirect(
|
||||||
|
extract_text(eval_xpath(result, ".//a[contains(@class, 'imageResultSource')]/@href")) or ""
|
||||||
|
),
|
||||||
|
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'imageResultTitle')]")) or "",
|
||||||
|
source=extract_text(eval_xpath(result, ".//div[contains(@class, 'imageResultSource')]")) or "",
|
||||||
|
thumbnail_src=extract_text(eval_xpath(result, "./@data-thumbnail-src")) or "",
|
||||||
|
img_src=extract_text(eval_xpath(result, "./@data-image-src")) or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
|
doc = html.fromstring(resp.text)
|
||||||
|
res = EngineResults()
|
||||||
|
|
||||||
|
match luxxle_categ:
|
||||||
|
case "search":
|
||||||
|
_general_results(doc, res)
|
||||||
|
case "images":
|
||||||
|
_image_results(doc, res)
|
||||||
|
case "videos":
|
||||||
|
_video_results(doc, res)
|
||||||
|
case "news":
|
||||||
|
_news_results(doc, res)
|
||||||
|
case _:
|
||||||
|
raise ValueError("unsupported category: %s" % luxxle_categ)
|
||||||
|
|
||||||
|
return res
|
||||||
@@ -1477,6 +1477,38 @@ engines:
|
|||||||
shortcut: luc
|
shortcut: luc
|
||||||
timeout: 3.0
|
timeout: 3.0
|
||||||
|
|
||||||
|
- name: luxxle
|
||||||
|
engine: luxxle
|
||||||
|
categories: general
|
||||||
|
luxxle_categ: search
|
||||||
|
shortcut: lux
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
|
- name: luxxle images
|
||||||
|
engine: luxxle
|
||||||
|
categories: images
|
||||||
|
luxxle_categ: images
|
||||||
|
shortcut: luxi
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
|
- name: luxxle videos
|
||||||
|
engine: luxxle
|
||||||
|
categories: videos
|
||||||
|
luxxle_categ: videos
|
||||||
|
shortcut: luxv
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
|
- name: luxxle news
|
||||||
|
engine: luxxle
|
||||||
|
categories: news
|
||||||
|
luxxle_categ: news
|
||||||
|
shortcut: luxn
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: marginalia
|
- name: marginalia
|
||||||
engine: marginalia
|
engine: marginalia
|
||||||
shortcut: mar
|
shortcut: mar
|
||||||
|
|||||||
Reference in New Issue
Block a user