[fix] google engine: Result image thumbnails

This commit is contained in:
Aadniz
2026-03-15 19:16:03 +01:00
committed by Bnyro
parent 3c1f68c59e
commit 4c4ed4b198
2 changed files with 33 additions and 23 deletions
+15 -21
View File
@@ -327,23 +327,16 @@ def request(query: str, params: "OnlineParams") -> None:
params["headers"].update(google_info["headers"]) params["headers"].update(google_info["headers"])
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA # regex match to get image map that is found inside the returned javascript:
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26; # (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);') RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
RE_DATA_IMAGE_end = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*)$')
def parse_data_images(text: str): def parse_url_images(text: str):
data_image_map = {} data_image_map = {}
for img_id, data_image in RE_DATA_IMAGE.findall(text): for img_id, image_url in RE_DATA_IMAGE.findall(text):
end_pos = data_image.rfind("=") data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
if end_pos > 0:
data_image = data_image[: end_pos + 1]
data_image_map[img_id] = data_image
last = RE_DATA_IMAGE_end.search(text)
if last:
data_image_map[last.group(1)] = last.group(2)
logger.debug("data:image objects --> %s", list(data_image_map.keys())) logger.debug("data:image objects --> %s", list(data_image_map.keys()))
return data_image_map return data_image_map
@@ -352,7 +345,7 @@ def response(resp: "SXNG_Response"):
"""Get response from google's search request""" """Get response from google's search request"""
# pylint: disable=too-many-branches, too-many-statements # pylint: disable=too-many-branches, too-many-statements
detect_google_sorry(resp) detect_google_sorry(resp)
data_image_map = parse_data_images(resp.text) data_image_map = parse_url_images(resp.text)
results = EngineResults() results = EngineResults()
@@ -392,15 +385,16 @@ def response(resp: "SXNG_Response"):
content = extract_text(content_nodes) content = extract_text(content_nodes)
thumbnail = result.xpath(".//img/@src") # Images that are NOT the favicon
if thumbnail: xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
thumbnail = thumbnail[0]
thumbnail = None
if xpath_image is not None:
thumbnail = xpath_image.get("src")
if thumbnail.startswith("data:image"): if thumbnail.startswith("data:image"):
img_id = result.xpath(".//img/@id") img_id = xpath_image.get("id")
if img_id: if img_id:
thumbnail = data_image_map.get(img_id[0]) thumbnail = data_image_map.get(img_id)
else:
thumbnail = None
results.append({"url": url, "title": title, "content": content or '', "thumbnail": thumbnail}) results.append({"url": url, "title": title, "content": content or '', "thumbnail": thumbnail})
+18 -2
View File
@@ -11,7 +11,7 @@
.. _data URLs: .. _data URLs:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
""" """
import re
from urllib.parse import urlencode, urlparse, parse_qs, unquote from urllib.parse import urlencode, urlparse, parse_qs, unquote
from lxml import html from lxml import html
@@ -29,7 +29,6 @@ from searx.engines.google import (
suggestion_xpath, suggestion_xpath,
detect_google_sorry, detect_google_sorry,
ui_async, ui_async,
parse_data_images,
) )
from searx.utils import get_embeded_stream_url from searx.utils import get_embeded_stream_url
@@ -52,6 +51,23 @@ time_range_support = True
safesearch = True safesearch = True
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);?')
def parse_data_images(text: str):
data_image_map = {}
for img_id, data_image in RE_DATA_IMAGE.findall(text):
end_pos = data_image.rfind("=")
if end_pos > 0:
data_image = data_image[: end_pos + 1]
data_image_map[img_id] = data_image
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
return data_image_map
def request(query, params): def request(query, params):
"""Google-Video search request""" """Google-Video search request"""
google_info = get_google_info(params, traits) google_info = get_google_info(params, traits)