mirror of
https://github.com/searxng/searxng.git
synced 2026-05-27 13:20:11 +02:00
[fix] google engine: Result image thumbnails
This commit is contained in:
+15
-21
@@ -327,23 +327,16 @@ def request(query: str, params: "OnlineParams") -> None:
|
|||||||
params["headers"].update(google_info["headers"])
|
params["headers"].update(google_info["headers"])
|
||||||
|
|
||||||
|
|
||||||
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
|
# regex match to get image map that is found inside the returned javascript:
|
||||||
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
|
# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
|
||||||
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
|
RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
|
||||||
RE_DATA_IMAGE_end = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*)$')
|
|
||||||
|
|
||||||
|
|
||||||
def parse_data_images(text: str):
|
def parse_url_images(text: str):
|
||||||
data_image_map = {}
|
data_image_map = {}
|
||||||
|
|
||||||
for img_id, data_image in RE_DATA_IMAGE.findall(text):
|
for img_id, image_url in RE_DATA_IMAGE.findall(text):
|
||||||
end_pos = data_image.rfind("=")
|
data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
|
||||||
if end_pos > 0:
|
|
||||||
data_image = data_image[: end_pos + 1]
|
|
||||||
data_image_map[img_id] = data_image
|
|
||||||
last = RE_DATA_IMAGE_end.search(text)
|
|
||||||
if last:
|
|
||||||
data_image_map[last.group(1)] = last.group(2)
|
|
||||||
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
|
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
|
||||||
return data_image_map
|
return data_image_map
|
||||||
|
|
||||||
@@ -352,7 +345,7 @@ def response(resp: "SXNG_Response"):
|
|||||||
"""Get response from google's search request"""
|
"""Get response from google's search request"""
|
||||||
# pylint: disable=too-many-branches, too-many-statements
|
# pylint: disable=too-many-branches, too-many-statements
|
||||||
detect_google_sorry(resp)
|
detect_google_sorry(resp)
|
||||||
data_image_map = parse_data_images(resp.text)
|
data_image_map = parse_url_images(resp.text)
|
||||||
|
|
||||||
results = EngineResults()
|
results = EngineResults()
|
||||||
|
|
||||||
@@ -392,15 +385,16 @@ def response(resp: "SXNG_Response"):
|
|||||||
|
|
||||||
content = extract_text(content_nodes)
|
content = extract_text(content_nodes)
|
||||||
|
|
||||||
thumbnail = result.xpath(".//img/@src")
|
# Images that are NOT the favicon
|
||||||
if thumbnail:
|
xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
|
||||||
thumbnail = thumbnail[0]
|
|
||||||
|
thumbnail = None
|
||||||
|
if xpath_image is not None:
|
||||||
|
thumbnail = xpath_image.get("src")
|
||||||
if thumbnail.startswith("data:image"):
|
if thumbnail.startswith("data:image"):
|
||||||
img_id = result.xpath(".//img/@id")
|
img_id = xpath_image.get("id")
|
||||||
if img_id:
|
if img_id:
|
||||||
thumbnail = data_image_map.get(img_id[0])
|
thumbnail = data_image_map.get(img_id)
|
||||||
else:
|
|
||||||
thumbnail = None
|
|
||||||
|
|
||||||
results.append({"url": url, "title": title, "content": content or '', "thumbnail": thumbnail})
|
results.append({"url": url, "title": title, "content": content or '', "thumbnail": thumbnail})
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
.. _data URLs:
|
.. _data URLs:
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
|
||||||
"""
|
"""
|
||||||
|
import re
|
||||||
from urllib.parse import urlencode, urlparse, parse_qs, unquote
|
from urllib.parse import urlencode, urlparse, parse_qs, unquote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
@@ -29,7 +29,6 @@ from searx.engines.google import (
|
|||||||
suggestion_xpath,
|
suggestion_xpath,
|
||||||
detect_google_sorry,
|
detect_google_sorry,
|
||||||
ui_async,
|
ui_async,
|
||||||
parse_data_images,
|
|
||||||
)
|
)
|
||||||
from searx.utils import get_embeded_stream_url
|
from searx.utils import get_embeded_stream_url
|
||||||
|
|
||||||
@@ -52,6 +51,23 @@ time_range_support = True
|
|||||||
safesearch = True
|
safesearch = True
|
||||||
|
|
||||||
|
|
||||||
|
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
|
||||||
|
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
|
||||||
|
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);?')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_data_images(text: str):
|
||||||
|
data_image_map = {}
|
||||||
|
|
||||||
|
for img_id, data_image in RE_DATA_IMAGE.findall(text):
|
||||||
|
end_pos = data_image.rfind("=")
|
||||||
|
if end_pos > 0:
|
||||||
|
data_image = data_image[: end_pos + 1]
|
||||||
|
data_image_map[img_id] = data_image
|
||||||
|
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
|
||||||
|
return data_image_map
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
"""Google-Video search request"""
|
"""Google-Video search request"""
|
||||||
google_info = get_google_info(params, traits)
|
google_info = get_google_info(params, traits)
|
||||||
|
|||||||
Reference in New Issue
Block a user