mirror of
https://github.com/searxng/searxng.git
synced 2026-05-07 18:03:51 +02:00
[fix] unsplash: fix engine due to anubis bot blocking (#5907)
Unsplash started using [Anubis](https://anubis.techaro.lol/) for blocking crawlers. Therefore, requests using common user agents (e.g. Firefox, Chrome) must pass a JavaScript challenge. However, other user agents seem unaffected for now, hence settings the UA to something different does still work.
This commit is contained in:
@@ -4,6 +4,8 @@
|
|||||||
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
|
||||||
from json import loads
|
from json import loads
|
||||||
|
|
||||||
|
from searx.utils import searxng_useragent
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://unsplash.com',
|
"website": 'https://unsplash.com',
|
||||||
@@ -31,6 +33,13 @@ def clean_url(url):
|
|||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size})
|
params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size})
|
||||||
logger.debug("query_url --> %s", params['url'])
|
logger.debug("query_url --> %s", params['url'])
|
||||||
|
|
||||||
|
# common user agents (e.g. Firefox, Chrome) are blocked
|
||||||
|
# by Anubis (https://anubis.techaro.lol/)
|
||||||
|
# so we pass the searxng user agent instead, which is not
|
||||||
|
# commonly used by crawlers and hence not blocked
|
||||||
|
params["headers"]["User-Agent"] = searxng_useragent()
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user