mirror of
https://github.com/searxng/searxng.git
synced 2026-06-13 21:37:51 +02:00
b48205b384
e.g. when searching for "!tiger pottering github", it crashes. not really sure why - the problem is that the HTML doesn't really uses descriptive classes or ids, only Tailwind, so it's very hard to select only the results HTML.
171 lines
5.4 KiB
Python
171 lines
5.4 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Tiger_ is a Swiss meta search engine.
|
|
|
|
.. _Tiger: https://tiger.ch
|
|
"""
|
|
|
|
from json import loads
|
|
import random
|
|
from urllib.parse import urlencode
|
|
|
|
import typing as t
|
|
|
|
from dateutil import parser
|
|
from lxml import html
|
|
|
|
from searx.exceptions import SearxEngineAPIException
|
|
from searx.extended_types import SXNG_Response
|
|
from searx.network import get, post
|
|
from searx.result_types import EngineResults
|
|
from searx.utils import extr, eval_xpath_list, eval_xpath, extract_text
|
|
from searx.enginelib import EngineCache
|
|
|
|
if t.TYPE_CHECKING:
|
|
from searx.search.processors import OnlineParams
|
|
|
|
about = {
|
|
"website": "https://tiger.ch",
|
|
"official_api_documentation": None,
|
|
"use_official_api": False,
|
|
"require_api_key": False,
|
|
"results": "HTML",
|
|
}
|
|
|
|
paging = True
|
|
|
|
base_url = "https://tiger.ch"
|
|
categories = []
|
|
tiger_category = "Websuche"
|
|
"""
|
|
Possible values: "Websuche", "News".
|
|
"""
|
|
|
|
|
|
CACHE: EngineCache
|
|
"""Cache to store session codes (result of solved CAPTCHA)."""
|
|
|
|
|
|
def init(_):
|
|
if tiger_category not in ("Websuche", "News"):
|
|
raise ValueError("invalid search category: %s" % tiger_category)
|
|
|
|
|
|
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
|
global CACHE # pylint: disable=global-statement
|
|
CACHE = EngineCache(engine_settings["name"])
|
|
return True
|
|
|
|
|
|
def _obtain_session_code() -> str:
|
|
"""The challenge works like this:
|
|
|
|
- We first generate 3 random numbers.
|
|
- Then we send them to /Human.svc/Make to get the operands (+, -) for the
|
|
math challenge (i.e. a simple calculation)
|
|
- Based on the operands, we calculate a result (usually done by the user by
|
|
hand)
|
|
- We send the result of the math calculation to the server to obtain a
|
|
session "code" that has to be sent as cookie parameter for all searches
|
|
|
|
E.g., challenges look like ``19-3+5``.
|
|
"""
|
|
cached_session = CACHE.get("session")
|
|
if cached_session:
|
|
return cached_session
|
|
|
|
results_page = get(f"{base_url}/_internCode.aspx")
|
|
doc = html.fromstring(results_page.text)
|
|
|
|
extra_data: dict[str, str] = {}
|
|
for extra_param in ("__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION"):
|
|
extra_data[extra_param] = doc.xpath(f"//input[@name='{extra_param}']/@value")[0]
|
|
|
|
# var z1 = Math.floor((Math.random() * 8) + 11);
|
|
# var z2 = Math.floor((Math.random() * 8) + 1);
|
|
# var z3 = Math.floor((Math.random() * 8) + 1);
|
|
num1 = random.randint(11, 19)
|
|
num2 = random.randint(1, 9)
|
|
num3 = random.randint(1, 9)
|
|
|
|
challenge = get(f"{base_url}/Services/Human.svc/Make?M1={num1}&M2={num2}&M3={num3}", cookies=results_page.cookies)
|
|
signs = loads(challenge.json()["d"])[0]
|
|
sign1 = signs["Z1"]
|
|
sign2 = signs["Z2"]
|
|
|
|
result = num1
|
|
for num, sign in [(num2, sign1), (num3, sign2)]:
|
|
if sign == "+":
|
|
result += num
|
|
else:
|
|
result -= num
|
|
|
|
logger.debug(f"got challenge: {num1} {sign1} {num2} {sign2} {num3} = {result}")
|
|
data = {
|
|
**extra_data,
|
|
"txtM": str(result),
|
|
"btnHuman": "OK",
|
|
}
|
|
|
|
challenge_response = post(
|
|
f"{base_url}/_internCode.aspx",
|
|
cookies=results_page.cookies,
|
|
data=data,
|
|
)
|
|
|
|
cookie = challenge_response.cookies["Tiger.ch"]
|
|
code = extr(cookie, "Code=", "&")
|
|
if not code:
|
|
raise SearxEngineAPIException("failed to obtain session code")
|
|
|
|
CACHE.set("session", code, expire=60 * 24 * 60) # cookie is valid for two months
|
|
return code
|
|
|
|
|
|
def request(query: str, params: "OnlineParams"):
|
|
code = _obtain_session_code()
|
|
args = {"w": query, "page": params["pageno"]}
|
|
params["url"] = f"{base_url}/{tiger_category}?{urlencode(args)}"
|
|
params["cookies"]["Tiger.ch"] = f"Code={code}"
|
|
|
|
|
|
def response(resp: "SXNG_Response") -> EngineResults:
|
|
res = EngineResults()
|
|
doc = html.fromstring(resp.text)
|
|
|
|
if tiger_category == "Websuche":
|
|
for result in eval_xpath_list(doc, "//div[@id='mainContainer']//table/tr"):
|
|
url = extract_text(eval_xpath(result, ".//a[contains(@class, 'weblink')]/@href"))
|
|
if not url:
|
|
continue
|
|
res.add(
|
|
res.types.MainResult(
|
|
url=url,
|
|
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'weblink')]")) or "",
|
|
content=extract_text(eval_xpath(result, ".//*[contains(@class, 'webbodynopic')]")) or "",
|
|
)
|
|
)
|
|
elif tiger_category == "News":
|
|
for result in eval_xpath_list(doc, "//div[@id='panNews']/div"):
|
|
publishedDate = None
|
|
try:
|
|
date_str = extract_text(eval_xpath(result, ".//span[contains(@class, 'help')]/span")) or ""
|
|
date_str = date_str.strip().removeprefix("-").strip()
|
|
publishedDate = parser.parse(date_str)
|
|
except parser.ParserError:
|
|
pass
|
|
|
|
thumbnail = extract_text(eval_xpath(result, "./img/@src"))
|
|
if thumbnail:
|
|
thumbnail = base_url + thumbnail
|
|
|
|
res.add(
|
|
res.types.MainResult(
|
|
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'webLink')]/@href")),
|
|
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'webLink')]")) or "",
|
|
thumbnail=thumbnail or "",
|
|
publishedDate=publishedDate,
|
|
)
|
|
)
|
|
|
|
return res
|