mirror of
https://github.com/searxng/searxng.git
synced 2026-06-08 19:07:50 +02:00
[feat] engines: add tiger.ch engine
Add support for https://tiger.ch (general, news) It is disabled and inactive by default because it's just a metasearch engine like SearXNG is, so it's mostly useful for bypassing rate-limits on other engines: (it has its own German index, but it's not that great) in theory it supports different locales, but I was too lazy to implement that (I only need German and English results anyways, which are returned by default...)
This commit is contained in:
@@ -0,0 +1,167 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Tiger_ is a Swiss meta search engine.
|
||||
|
||||
.. _Tiger: https://tiger.ch
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
import random
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import typing as t
|
||||
|
||||
from dateutil import parser
|
||||
from lxml import html
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.network import get, post
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import extr, eval_xpath_list, eval_xpath, extract_text
|
||||
from searx.enginelib import EngineCache
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://tiger.ch",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "HTML",
|
||||
}
|
||||
|
||||
paging = True
|
||||
|
||||
base_url = "https://tiger.ch"
|
||||
categories = []
|
||||
tiger_category = "Websuche"
|
||||
"""
|
||||
Possible values: "Websuche", "News".
|
||||
"""
|
||||
|
||||
|
||||
CACHE: EngineCache
|
||||
"""Cache to store session codes (result of solved CAPTCHA)."""
|
||||
|
||||
|
||||
def init(_):
|
||||
if tiger_category not in ("Websuche", "News"):
|
||||
raise ValueError("invalid search category: %s" % tiger_category)
|
||||
|
||||
|
||||
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||
global CACHE # pylint: disable=global-statement
|
||||
CACHE = EngineCache(engine_settings["name"])
|
||||
return True
|
||||
|
||||
|
||||
def _obtain_session_code() -> str:
|
||||
"""The challenge works like this:
|
||||
|
||||
- We first generate 3 random numbers.
|
||||
- Then we send them to /Human.svc/Make to get the operands (+, -) for the
|
||||
math challenge (i.e. a simple calculation)
|
||||
- Based on the operands, we calculate a result (usually done by the user by
|
||||
hand)
|
||||
- We send the result of the math calculation to the server to obtain a
|
||||
session "code" that has to be sent as cookie parameter for all searches
|
||||
|
||||
E.g., challenges look like ``19-3+5``.
|
||||
"""
|
||||
cached_session = CACHE.get("session")
|
||||
if cached_session:
|
||||
return cached_session
|
||||
|
||||
results_page = get(f"{base_url}/_internCode.aspx")
|
||||
doc = html.fromstring(results_page.text)
|
||||
|
||||
extra_data: dict[str, str] = {}
|
||||
for extra_param in ("__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION"):
|
||||
extra_data[extra_param] = doc.xpath(f"//input[@name='{extra_param}']/@value")[0]
|
||||
|
||||
# var z1 = Math.floor((Math.random() * 8) + 11);
|
||||
# var z2 = Math.floor((Math.random() * 8) + 1);
|
||||
# var z3 = Math.floor((Math.random() * 8) + 1);
|
||||
num1 = random.randint(11, 19)
|
||||
num2 = random.randint(1, 9)
|
||||
num3 = random.randint(1, 9)
|
||||
|
||||
challenge = get(f"{base_url}/Services/Human.svc/Make?M1={num1}&M2={num2}&M3={num3}", cookies=results_page.cookies)
|
||||
signs = loads(challenge.json()["d"])[0]
|
||||
sign1 = signs["Z1"]
|
||||
sign2 = signs["Z2"]
|
||||
|
||||
result = num1
|
||||
for num, sign in [(num2, sign1), (num3, sign2)]:
|
||||
if sign == "+":
|
||||
result += num
|
||||
else:
|
||||
result -= num
|
||||
|
||||
logger.debug(f"got challenge: {num1} {sign1} {num2} {sign2} {num3} = {result}")
|
||||
data = {
|
||||
**extra_data,
|
||||
"txtM": str(result),
|
||||
"btnHuman": "OK",
|
||||
}
|
||||
|
||||
challenge_response = post(
|
||||
f"{base_url}/_internCode.aspx",
|
||||
cookies=results_page.cookies,
|
||||
data=data,
|
||||
)
|
||||
|
||||
cookie = challenge_response.cookies["Tiger.ch"]
|
||||
code = extr(cookie, "Code=", "&")
|
||||
if not code:
|
||||
raise SearxEngineAPIException("failed to obtain session code")
|
||||
|
||||
CACHE.set("session", code, expire=60 * 24 * 60) # cookie is valid for two months
|
||||
return code
|
||||
|
||||
|
||||
def request(query: str, params: "OnlineParams"):
|
||||
code = _obtain_session_code()
|
||||
args = {"w": query, "page": params["pageno"]}
|
||||
params["url"] = f"{base_url}/{tiger_category}?{urlencode(args)}"
|
||||
params["cookies"]["Tiger.ch"] = f"Code={code}"
|
||||
|
||||
|
||||
def response(resp: "SXNG_Response") -> EngineResults:
|
||||
res = EngineResults()
|
||||
doc = html.fromstring(resp.text)
|
||||
|
||||
if tiger_category == "Websuche":
|
||||
for result in eval_xpath_list(doc, "//div[@id='mainContainer']//table/tr"):
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'weblink')]/@href")),
|
||||
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'weblink')]")) or "",
|
||||
content=extract_text(eval_xpath(result, ".//*[contains(@class, 'webbodynopic')]")) or "",
|
||||
)
|
||||
)
|
||||
elif tiger_category == "News":
|
||||
for result in eval_xpath_list(doc, "//div[@id='panNews']/div"):
|
||||
publishedDate = None
|
||||
try:
|
||||
date_str = extract_text(eval_xpath(result, ".//span[contains(@class, 'help')]/span")) or ""
|
||||
date_str = date_str.strip().removeprefix("-").strip()
|
||||
publishedDate = parser.parse(date_str)
|
||||
except parser.ParserError:
|
||||
pass
|
||||
|
||||
thumbnail = extract_text(eval_xpath(result, "./img/@src"))
|
||||
if thumbnail:
|
||||
thumbnail = base_url + thumbnail
|
||||
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'webLink')]/@href")),
|
||||
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'webLink')]")) or "",
|
||||
thumbnail=thumbnail or "",
|
||||
publishedDate=publishedDate,
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
@@ -2078,6 +2078,22 @@ engines:
|
||||
shortcut: ts
|
||||
disabled: true
|
||||
|
||||
- name: tiger
|
||||
engine: tiger
|
||||
categories: general
|
||||
tiger_category: Websuche
|
||||
shortcut: tig
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: tiger news
|
||||
engine: tiger
|
||||
categories: news
|
||||
tiger_category: News
|
||||
shortcut: tign
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: tmdb
|
||||
engine: xpath
|
||||
paging: true
|
||||
|
||||
Reference in New Issue
Block a user