[fix] google engine: Result image thumbnails

This commit is contained in:
Aadniz
2026-03-15 19:16:03 +01:00
committed by Bnyro
parent 3c1f68c59e
commit 4c4ed4b198
2 changed files with 33 additions and 23 deletions
+15 -21
View File
@@ -327,23 +327,16 @@ def request(query: str, params: "OnlineParams") -> None:
params["headers"].update(google_info["headers"])
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
RE_DATA_IMAGE_end = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*)$')
# regex match to get image map that is found inside the returned javascript:
# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
def parse_data_images(text: str):
def parse_url_images(text: str):
data_image_map = {}
for img_id, data_image in RE_DATA_IMAGE.findall(text):
end_pos = data_image.rfind("=")
if end_pos > 0:
data_image = data_image[: end_pos + 1]
data_image_map[img_id] = data_image
last = RE_DATA_IMAGE_end.search(text)
if last:
data_image_map[last.group(1)] = last.group(2)
for img_id, image_url in RE_DATA_IMAGE.findall(text):
data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
return data_image_map
@@ -352,7 +345,7 @@ def response(resp: "SXNG_Response"):
"""Get response from google's search request"""
# pylint: disable=too-many-branches, too-many-statements
detect_google_sorry(resp)
data_image_map = parse_data_images(resp.text)
data_image_map = parse_url_images(resp.text)
results = EngineResults()
@@ -392,15 +385,16 @@ def response(resp: "SXNG_Response"):
content = extract_text(content_nodes)
thumbnail = result.xpath(".//img/@src")
if thumbnail:
thumbnail = thumbnail[0]
# Images that are NOT the favicon
xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
thumbnail = None
if xpath_image is not None:
thumbnail = xpath_image.get("src")
if thumbnail.startswith("data:image"):
img_id = result.xpath(".//img/@id")
img_id = xpath_image.get("id")
if img_id:
thumbnail = data_image_map.get(img_id[0])
else:
thumbnail = None
thumbnail = data_image_map.get(img_id)
results.append({"url": url, "title": title, "content": content or '', "thumbnail": thumbnail})
+18 -2
View File
@@ -11,7 +11,7 @@
.. _data URLs:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
"""
import re
from urllib.parse import urlencode, urlparse, parse_qs, unquote
from lxml import html
@@ -29,7 +29,6 @@ from searx.engines.google import (
suggestion_xpath,
detect_google_sorry,
ui_async,
parse_data_images,
)
from searx.utils import get_embeded_stream_url
@@ -52,6 +51,23 @@ time_range_support = True
safesearch = True
# =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
# ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);?')
def parse_data_images(text: str):
data_image_map = {}
for img_id, data_image in RE_DATA_IMAGE.findall(text):
end_pos = data_image.rfind("=")
if end_pos > 0:
data_image = data_image[: end_pos + 1]
data_image_map[img_id] = data_image
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
return data_image_map
def request(query, params):
"""Google-Video search request"""
google_info = get_google_info(params, traits)