mirror of
https://github.com/searxng/searxng.git
synced 2026-06-08 19:07:50 +02:00
[feat] engines: add tiger.ch engine
Add support for https://tiger.ch (general, news) It is disabled and inactive by default because it's just a metasearch engine like SearXNG is, so it's mostly useful for bypassing rate-limits on other engines: (it has its own German index, but it's not that great) in theory it supports different locales, but I was too lazy to implement that (I only need German and English results anyways, which are returned by default...)
This commit is contained in:
@@ -0,0 +1,167 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Tiger_ is a Swiss meta search engine.
|
||||||
|
|
||||||
|
.. _Tiger: https://tiger.ch
|
||||||
|
"""
|
||||||
|
|
||||||
|
from json import loads
|
||||||
|
import random
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
import typing as t
|
||||||
|
|
||||||
|
from dateutil import parser
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
from searx.exceptions import SearxEngineAPIException
|
||||||
|
from searx.extended_types import SXNG_Response
|
||||||
|
from searx.network import get, post
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
from searx.utils import extr, eval_xpath_list, eval_xpath, extract_text
|
||||||
|
from searx.enginelib import EngineCache
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.search.processors import OnlineParams
|
||||||
|
|
||||||
|
about = {
|
||||||
|
"website": "https://tiger.ch",
|
||||||
|
"official_api_documentation": None,
|
||||||
|
"use_official_api": False,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": "HTML",
|
||||||
|
}
|
||||||
|
|
||||||
|
paging = True
|
||||||
|
|
||||||
|
base_url = "https://tiger.ch"
|
||||||
|
categories = []
|
||||||
|
tiger_category = "Websuche"
|
||||||
|
"""
|
||||||
|
Possible values: "Websuche", "News".
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
CACHE: EngineCache
|
||||||
|
"""Cache to store session codes (result of solved CAPTCHA)."""
|
||||||
|
|
||||||
|
|
||||||
|
def init(_):
|
||||||
|
if tiger_category not in ("Websuche", "News"):
|
||||||
|
raise ValueError("invalid search category: %s" % tiger_category)
|
||||||
|
|
||||||
|
|
||||||
|
def setup(engine_settings: dict[str, t.Any]) -> bool:
|
||||||
|
global CACHE # pylint: disable=global-statement
|
||||||
|
CACHE = EngineCache(engine_settings["name"])
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _obtain_session_code() -> str:
|
||||||
|
"""The challenge works like this:
|
||||||
|
|
||||||
|
- We first generate 3 random numbers.
|
||||||
|
- Then we send them to /Human.svc/Make to get the operands (+, -) for the
|
||||||
|
math challenge (i.e. a simple calculation)
|
||||||
|
- Based on the operands, we calculate a result (usually done by the user by
|
||||||
|
hand)
|
||||||
|
- We send the result of the math calculation to the server to obtain a
|
||||||
|
session "code" that has to be sent as cookie parameter for all searches
|
||||||
|
|
||||||
|
E.g., challenges look like ``19-3+5``.
|
||||||
|
"""
|
||||||
|
cached_session = CACHE.get("session")
|
||||||
|
if cached_session:
|
||||||
|
return cached_session
|
||||||
|
|
||||||
|
results_page = get(f"{base_url}/_internCode.aspx")
|
||||||
|
doc = html.fromstring(results_page.text)
|
||||||
|
|
||||||
|
extra_data: dict[str, str] = {}
|
||||||
|
for extra_param in ("__VIEWSTATE", "__VIEWSTATEGENERATOR", "__EVENTVALIDATION"):
|
||||||
|
extra_data[extra_param] = doc.xpath(f"//input[@name='{extra_param}']/@value")[0]
|
||||||
|
|
||||||
|
# var z1 = Math.floor((Math.random() * 8) + 11);
|
||||||
|
# var z2 = Math.floor((Math.random() * 8) + 1);
|
||||||
|
# var z3 = Math.floor((Math.random() * 8) + 1);
|
||||||
|
num1 = random.randint(11, 19)
|
||||||
|
num2 = random.randint(1, 9)
|
||||||
|
num3 = random.randint(1, 9)
|
||||||
|
|
||||||
|
challenge = get(f"{base_url}/Services/Human.svc/Make?M1={num1}&M2={num2}&M3={num3}", cookies=results_page.cookies)
|
||||||
|
signs = loads(challenge.json()["d"])[0]
|
||||||
|
sign1 = signs["Z1"]
|
||||||
|
sign2 = signs["Z2"]
|
||||||
|
|
||||||
|
result = num1
|
||||||
|
for num, sign in [(num2, sign1), (num3, sign2)]:
|
||||||
|
if sign == "+":
|
||||||
|
result += num
|
||||||
|
else:
|
||||||
|
result -= num
|
||||||
|
|
||||||
|
logger.debug(f"got challenge: {num1} {sign1} {num2} {sign2} {num3} = {result}")
|
||||||
|
data = {
|
||||||
|
**extra_data,
|
||||||
|
"txtM": str(result),
|
||||||
|
"btnHuman": "OK",
|
||||||
|
}
|
||||||
|
|
||||||
|
challenge_response = post(
|
||||||
|
f"{base_url}/_internCode.aspx",
|
||||||
|
cookies=results_page.cookies,
|
||||||
|
data=data,
|
||||||
|
)
|
||||||
|
|
||||||
|
cookie = challenge_response.cookies["Tiger.ch"]
|
||||||
|
code = extr(cookie, "Code=", "&")
|
||||||
|
if not code:
|
||||||
|
raise SearxEngineAPIException("failed to obtain session code")
|
||||||
|
|
||||||
|
CACHE.set("session", code, expire=60 * 24 * 60) # cookie is valid for two months
|
||||||
|
return code
|
||||||
|
|
||||||
|
|
||||||
|
def request(query: str, params: "OnlineParams"):
|
||||||
|
code = _obtain_session_code()
|
||||||
|
args = {"w": query, "page": params["pageno"]}
|
||||||
|
params["url"] = f"{base_url}/{tiger_category}?{urlencode(args)}"
|
||||||
|
params["cookies"]["Tiger.ch"] = f"Code={code}"
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp: "SXNG_Response") -> EngineResults:
|
||||||
|
res = EngineResults()
|
||||||
|
doc = html.fromstring(resp.text)
|
||||||
|
|
||||||
|
if tiger_category == "Websuche":
|
||||||
|
for result in eval_xpath_list(doc, "//div[@id='mainContainer']//table/tr"):
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'weblink')]/@href")),
|
||||||
|
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'weblink')]")) or "",
|
||||||
|
content=extract_text(eval_xpath(result, ".//*[contains(@class, 'webbodynopic')]")) or "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif tiger_category == "News":
|
||||||
|
for result in eval_xpath_list(doc, "//div[@id='panNews']/div"):
|
||||||
|
publishedDate = None
|
||||||
|
try:
|
||||||
|
date_str = extract_text(eval_xpath(result, ".//span[contains(@class, 'help')]/span")) or ""
|
||||||
|
date_str = date_str.strip().removeprefix("-").strip()
|
||||||
|
publishedDate = parser.parse(date_str)
|
||||||
|
except parser.ParserError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
thumbnail = extract_text(eval_xpath(result, "./img/@src"))
|
||||||
|
if thumbnail:
|
||||||
|
thumbnail = base_url + thumbnail
|
||||||
|
|
||||||
|
res.add(
|
||||||
|
res.types.MainResult(
|
||||||
|
url=extract_text(eval_xpath(result, ".//a[contains(@class, 'webLink')]/@href")),
|
||||||
|
title=extract_text(eval_xpath(result, ".//a[contains(@class, 'webLink')]")) or "",
|
||||||
|
thumbnail=thumbnail or "",
|
||||||
|
publishedDate=publishedDate,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return res
|
||||||
@@ -2078,6 +2078,22 @@ engines:
|
|||||||
shortcut: ts
|
shortcut: ts
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
|
- name: tiger
|
||||||
|
engine: tiger
|
||||||
|
categories: general
|
||||||
|
tiger_category: Websuche
|
||||||
|
shortcut: tig
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
|
- name: tiger news
|
||||||
|
engine: tiger
|
||||||
|
categories: news
|
||||||
|
tiger_category: News
|
||||||
|
shortcut: tign
|
||||||
|
disabled: true
|
||||||
|
inactive: true
|
||||||
|
|
||||||
- name: tmdb
|
- name: tmdb
|
||||||
engine: xpath
|
engine: xpath
|
||||||
paging: true
|
paging: true
|
||||||
|
|||||||
Reference in New Issue
Block a user