mirror of
https://github.com/searxng/searxng.git
synced 2026-06-22 17:48:33 +02:00
[feat] engines: add findfiles.net file search engine
FindFiles.net is a specialized file search engine designed to help you search files online with precision. Unlike traditional search engines that mainly index web pages, FindFiles focuses on finding real files on the internet - including PDFs, documents, archives, videos, datasets, and more. [1] [1] https://findfiles.net
This commit is contained in:
@@ -0,0 +1,118 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""FindFiles.net_ is a Germany-based file search engine.
|
||||||
|
|
||||||
|
FindFiles.net_ is a specialized file search engine designed to help you search
|
||||||
|
files online with precision. Unlike traditional search engines that mainly index
|
||||||
|
web pages, FindFiles focuses on finding real files on the internet - including
|
||||||
|
PDFs, documents, archives, videos, datasets, and more.
|
||||||
|
|
||||||
|
.. _FindFiles.net: https://findfiles.net
|
||||||
|
"""
|
||||||
|
|
||||||
|
from os.path import basename
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from extended_types import SXNG_Response
|
||||||
|
from search.processors import OnlineParams
|
||||||
|
|
||||||
|
about = {
|
||||||
|
"website": "https://findfiles.net",
|
||||||
|
"wikidata_id": None,
|
||||||
|
"official_api_documentation": None,
|
||||||
|
"use_official_api": False,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": "HTML",
|
||||||
|
}
|
||||||
|
|
||||||
|
base_url = "https://findfiles.net"
|
||||||
|
categories = ["files"]
|
||||||
|
paging = True
|
||||||
|
safeserach = True
|
||||||
|
|
||||||
|
safesearch_map = {
|
||||||
|
0: "contentguard.off",
|
||||||
|
1: "contentguard.moderate",
|
||||||
|
2: "contentguard.strict",
|
||||||
|
}
|
||||||
|
|
||||||
|
FindFilesCategory = t.Literal[
|
||||||
|
"all",
|
||||||
|
"document",
|
||||||
|
"text",
|
||||||
|
"image",
|
||||||
|
"audio",
|
||||||
|
"video",
|
||||||
|
]
|
||||||
|
FINDFILES_CATEGORIES = t.get_args(FindFilesCategory)
|
||||||
|
|
||||||
|
findfiles_categ: FindFilesCategory = "all"
|
||||||
|
"""Category to search in."""
|
||||||
|
|
||||||
|
|
||||||
|
def setup(_: dict[str, t.Any]) -> bool:
|
||||||
|
if findfiles_categ not in FINDFILES_CATEGORIES:
|
||||||
|
raise ValueError("invalid category: %s" % findfiles_categ)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def request(query: str, params: "OnlineParams") -> None:
|
||||||
|
args = {
|
||||||
|
"query": query,
|
||||||
|
"contentguard": safesearch_map[params["safesearch"]],
|
||||||
|
"page": params["pageno"],
|
||||||
|
}
|
||||||
|
# the language in the path doesn't change anything about the results, it
|
||||||
|
# only changes the UI
|
||||||
|
params["url"] = f"{base_url}/en/serp/{findfiles_categ}/?{urlencode(args)}"
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
|
res = EngineResults()
|
||||||
|
|
||||||
|
dom = html.fromstring(resp.text)
|
||||||
|
if findfiles_categ == "image":
|
||||||
|
for result in eval_xpath_list(
|
||||||
|
dom, "//div[contains(@class, 'image-mosaic')]/div[contains(@class, 'image-item')]"
|
||||||
|
):
|
||||||
|
res.add(
|
||||||
|
res.types.Image(
|
||||||
|
url=extract_text(eval_xpath(result, ".//div[contains(@class, 'caption')]/a/@href")) or "",
|
||||||
|
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'caption')]/a")) or "",
|
||||||
|
thumbnail_src=extract_text(eval_xpath(result, ".//img/@src")) or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif findfiles_categ == "video":
|
||||||
|
for result in eval_xpath_list(
|
||||||
|
dom, "//div[contains(@class, 'video-mosaic')]/div[contains(@class, 'video-item')]"
|
||||||
|
):
|
||||||
|
video_src = extract_text(eval_xpath(result, ".//video/@src")) or ""
|
||||||
|
res.add(
|
||||||
|
res.types.LegacyResult(
|
||||||
|
template="videos.html",
|
||||||
|
url=video_src,
|
||||||
|
title=extract_text(eval_xpath(result, ".//div[contains(@class, 'caption')]/span")) or "",
|
||||||
|
iframe_src=video_src or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for result in eval_xpath_list(dom, "//ol/li[contains(@class, 'result-item')]/article"):
|
||||||
|
filename = basename(extract_text(eval_xpath(result, ".//h3")) or "")
|
||||||
|
res.add(
|
||||||
|
res.types.File(
|
||||||
|
url=extract_text(eval_xpath(result, ".//h3/a/@href")) or "",
|
||||||
|
title=filename,
|
||||||
|
content=" ".join(extract_text(el) or "" for el in eval_xpath_list(result, "./div/span")),
|
||||||
|
filename=filename,
|
||||||
|
size=extract_text(eval_xpath(result, "(.//span[@id])[1]")) or "",
|
||||||
|
embedded=extract_text(eval_xpath(result, ".//audio/@src")) or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return res
|
||||||
@@ -977,6 +977,34 @@ engines:
|
|||||||
shortcut: fd
|
shortcut: fd
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
|
- name: findfiles
|
||||||
|
engine: findfiles
|
||||||
|
findfiles_categ: all
|
||||||
|
categories: files
|
||||||
|
shortcut: fif
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: findfiles images
|
||||||
|
engine: findfiles
|
||||||
|
findfiles_categ: image
|
||||||
|
categories: images
|
||||||
|
shortcut: fifi
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: findfiles videos
|
||||||
|
engine: findfiles
|
||||||
|
findfiles_categ: video
|
||||||
|
categories: videos
|
||||||
|
shortcut: fifv
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: findfiles music
|
||||||
|
engine: findfiles
|
||||||
|
findfiles_categ: audio
|
||||||
|
categories: music
|
||||||
|
shortcut: fifm
|
||||||
|
disabled: true
|
||||||
|
|
||||||
- name: findthatmeme
|
- name: findthatmeme
|
||||||
engine: findthatmeme
|
engine: findthatmeme
|
||||||
shortcut: ftm
|
shortcut: ftm
|
||||||
|
|||||||
Reference in New Issue
Block a user