# SPDX-License-Identifier: AGPL-3.0-or-later
"""This module implements the Wikidata engine. Some implementations are shared
from :ref:`wikipedia engine`.
"""
# pylint: disable=missing-class-docstring
import typing as t
from hashlib import md5
from urllib.parse import urlencode, unquote
from json import loads
from dateutil.parser import isoparse
from babel.dates import format_datetime, format_date, format_time, get_datetime_format
from searx.data import WIKIDATA_UNITS
from searx.network import post, get
from searx.utils import searxng_useragent, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
from searx.engines.wikipedia import (
fetch_wikimedia_traits,
get_wiki_params,
)
from searx.enginelib.traits import EngineTraits
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
# about
about = {
"website": 'https://wikidata.org/',
"wikidata_id": 'Q2013',
"official_api_documentation": 'https://query.wikidata.org/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
display_type = ["infobox"]
"""A list of display types composed from ``infobox`` and ``list``. The latter
one will add a hit to the result list. The first one will show a hit in the
info box. Both values can be set, or one of the two can be set."""
# SPARQL
SPARQL_ENDPOINT_URL = "https://query.wikidata.org/sparql"
SPARQL_EXPLAIN_URL = "https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain"
WIKIDATA_PROPERTIES: dict[str | tuple[str, str], str] = {
"P434": "MusicBrainz",
"P435": "MusicBrainz",
"P436": "MusicBrainz",
"P966": "MusicBrainz",
"P345": "IMDb",
"P2397": "YouTube",
"P1651": "YouTube",
"P2002": "Twitter",
"P2013": "Facebook",
"P2003": "Instagram",
"P4033": "Mastodon",
"P11947": "Lemmy",
"P12622": "PeerTube",
}
# SERVICE wikibase:mwapi : https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual/MWAPI
# SERVICE wikibase:label: https://en.wikibooks.org/wiki/SPARQL/SERVICE_-_Label#Manual_Label_SERVICE
# https://en.wikibooks.org/wiki/SPARQL/WIKIDATA_Precision,_Units_and_Coordinates
# https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format#Data_model
# optimization:
# * https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/query_optimization
# * https://github.com/blazegraph/database/wiki/QueryHints
QUERY_TEMPLATE = """
SELECT ?item ?itemLabel ?itemDescription ?lat ?long %SELECT%
WHERE
{
SERVICE wikibase:mwapi {
bd:serviceParam wikibase:endpoint "www.wikidata.org";
wikibase:api "EntitySearch";
wikibase:limit 1;
mwapi:search "%QUERY%";
mwapi:language "%LANGUAGE%".
?item wikibase:apiOutputItem mwapi:item.
}
hint:Prior hint:runFirst "true".
%WHERE%
SERVICE wikibase:label {
bd:serviceParam wikibase:language "%LANGUAGE%,en".
?item rdfs:label ?itemLabel .
?item schema:description ?itemDescription .
%WIKIBASE_LABELS%
}
}
GROUP BY ?item ?itemLabel ?itemDescription ?lat ?long %GROUP_BY%
"""
# Get the calendar names and the property names
QUERY_PROPERTY_NAMES = """
SELECT ?item ?name
WHERE {
{
SELECT ?item
WHERE { ?item wdt:P279* wd:Q12132 }
} UNION {
VALUES ?item { %ATTRIBUTES% }
}
OPTIONAL { ?item rdfs:label ?name. }
}
"""
# see the property "dummy value" of https://www.wikidata.org/wiki/Q2013 (Wikidata)
# hard coded here to avoid to an additional SPARQL request when the server starts
DUMMY_ENTITY_URLS = set(
"http://www.wikidata.org/entity/" + wid for wid in ("Q4115189", "Q13406268", "Q15397819", "Q17339402")
)
# https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1
# https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html
sparql_string_escape = get_string_replaces_function(
# fmt: off
{
"\t": "\\\t",
"\n": "\\\n",
"\r": "\\\r",
"\b": "\\\b",
"\f": "\\\f",
"\"": "\\\"",
"\'": "\\\'",
"\\": "\\\\"
}
# fmt: on
)
replace_http_by_https = get_string_replaces_function({"http:": "https:"})
class WDAttribute:
__slots__ = ("name",)
def __init__(self, name: str):
self.name: str = name
def get_select(self):
return "(group_concat(distinct ?{name};separator=', ') as ?{name}s)".replace("{name}", self.name)
def get_label(self, language: str):
return get_label_for_entity(self.name, language)
def get_where(self):
return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace("{name}", self.name)
def get_wikibase_label(self):
return ""
def get_group_by(self):
return ""
def get_str(self, result: dict[str, t.Any], language: str): # pylint: disable=unused-argument
return result.get(self.name + "s")
def __repr__(self):
return "<" + str(type(self).__name__) + ":" + self.name + ">"
class WDAmountAttribute(WDAttribute):
def get_select(self):
return "?{name} ?{name}Unit".replace("{name}", self.name)
def get_where(self):
return """ OPTIONAL { ?item p:{name} ?{name}Node .
?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} .
OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace(
'{name}', self.name
)
def get_group_by(self):
return self.get_select()
def get_str(self, result: dict[str, t.Any], language: str):
value = result.get(self.name)
unit = result.get(self.name + "Unit")
if unit is not None:
unit = unit.replace("http://www.wikidata.org/entity/", "")
return value + " " + get_label_for_entity(unit, language)
return value
class WDArticle(WDAttribute):
def __init__(self, language: str, kwargs=None):
super().__init__("wikipedia")
self.language: str = language
self.kwargs: dict[str, t.Any] = kwargs or {}
def get_label(self, language: str):
# language parameter is ignored
return "Wikipedia ({language})".replace("{language}", self.language)
def get_select(self):
return "?article{language} ?articleName{language}".replace("{language}", self.language)
def get_where(self):
return """OPTIONAL { ?article{language} schema:about ?item ;
schema:inLanguage "{language}" ;
schema:isPartOf ;
schema:name ?articleName{language} . }""".replace(
'{language}', self.language
)
def get_group_by(self):
return self.get_select()
def get_str(self, result, language: str):
key = "article{language}".replace("{language}", self.language)
return result.get(key)
class WDLabelAttribute(WDAttribute):
def get_select(self):
return "(group_concat(distinct ?{name}Label;separator=', ') as ?{name}Labels)".replace("{name}", self.name)
def get_where(self):
return "OPTIONAL { ?item wdt:{name} ?{name} . }".replace("{name}", self.name)
def get_wikibase_label(self):
return "?{name} rdfs:label ?{name}Label .".replace("{name}", self.name)
def get_str(self, result, language):
return result.get(self.name + "Labels")
class WDURLAttribute(WDAttribute):
HTTP_WIKIMEDIA_IMAGE = "http://commons.wikimedia.org/wiki/Special:FilePath/"
def __init__(
self,
name: str,
url_id: str | None = None,
url_path_prefix: str | None = None,
kwargs: dict[str, t.Any] | None = None,
):
"""
:param url_id: ID matching one key in ``external_urls.json`` for
converting IDs to full URLs.
:param url_path_prefix: Path prefix if the values are of format
``account@domain``. If provided, value are rewritten to
``https://``. For example::
WDURLAttribute('P4033', url_path_prefix='/@')
Adds Property `P4033 `_
to the wikidata query. This field might return for example
``libreoffice@fosstodon.org`` and the URL built from this is then:
- account: ``libreoffice``
- domain: ``fosstodon.org``
- result url: https://fosstodon.org/@libreoffice
"""
super().__init__(name)
self.url_id = url_id
self.url_path_prefix = url_path_prefix
self.kwargs = kwargs
def get_str(self, result, language: str):
value = result.get(self.name + "s")
if not value:
return None
value = value.split(",")[0]
if self.url_id:
url_id = self.url_id
if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):
value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :]
url_id = "wikimedia_image"
return get_external_url(url_id, value)
if self.url_path_prefix:
[account, domain] = [x.strip("@ ") for x in value.rsplit("@", 1)]
return f"https://{domain}{self.url_path_prefix}{account}"
return value
class WDGeoAttribute(WDAttribute):
def get_label(self, language: str):
return "OpenStreetMap"
def get_select(self):
return "?{name}Lat ?{name}Long".replace("{name}", self.name)
def get_where(self):
return """OPTIONAL { ?item p:{name}/psv:{name} [
wikibase:geoLatitude ?{name}Lat ;
wikibase:geoLongitude ?{name}Long ] }""".replace(
'{name}', self.name
)
def get_group_by(self):
return self.get_select()
def get_str(self, result, language: str):
latitude = result.get(self.name + "Lat")
longitude = result.get(self.name + "Long")
if latitude and longitude:
return latitude + " " + longitude
return None
def get_geo_url(self, result, osm_zoom=19):
latitude = result.get(self.name + "Lat")
longitude = result.get(self.name + "Long")
if latitude and longitude:
return get_earth_coordinates_url(latitude, longitude, osm_zoom)
return None
class WDImageAttribute(WDURLAttribute):
def __init__(self, name, url_id=None, priority=100):
super().__init__(name, url_id)
self.priority = priority
class WDDateAttribute(WDAttribute):
def get_select(self):
return "?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar".replace("{name}", self.name)
def get_where(self):
# To remove duplicate, add
# FILTER NOT EXISTS { ?item p:{name}/psv:{name}/wikibase:timeValue ?{name}bis FILTER (?{name}bis < ?{name}) }
# this filter is too slow, so the response function ignore duplicate results
# (see the seen_entities variable)
return """OPTIONAL { ?item p:{name}/psv:{name} [
wikibase:timeValue ?{name} ;
wikibase:timePrecision ?{name}timePrecision ;
wikibase:timeTimezone ?{name}timeZone ;
wikibase:timeCalendarModel ?{name}timeCalendar ] . }
hint:Prior hint:rangeSafe true;""".replace(
'{name}', self.name
)
def get_group_by(self):
return self.get_select()
def format_8(self, value, locale: str): # pylint: disable=unused-argument
# precision: less than a year
return value
def format_9(self, value, locale: str):
year = int(value)
# precision: year
if year < 1584:
if year < 0:
return str(year - 1)
return str(year)
timestamp = isoparse(value)
return format_date(timestamp, format="yyyy", locale=locale)
def format_10(self, value, locale: str):
# precision: month
timestamp = isoparse(value)
return format_date(timestamp, format="MMMM y", locale=locale)
def format_11(self, value, locale: str):
# precision: day
timestamp = isoparse(value)
return format_date(timestamp, format="full", locale=locale)
def format_13(self, value, locale: str):
timestamp = isoparse(value)
# precision: minute
return (
get_datetime_format(format, locale=locale)
.replace("'", "")
.replace("{0}", format_time(timestamp, "full", tzinfo=None, locale=locale))
.replace("{1}", format_date(timestamp, "short", locale=locale))
)
def format_14(self, value, locale):
# precision: second.
return format_datetime(isoparse(value), format="full", locale=locale)
DATE_FORMAT = {
"0": ("format_8", 1000000000),
"1": ("format_8", 100000000),
"2": ("format_8", 10000000),
"3": ("format_8", 1000000),
"4": ("format_8", 100000),
"5": ("format_8", 10000),
"6": ("format_8", 1000),
"7": ("format_8", 100),
"8": ("format_8", 10),
"9": ("format_9", 1), # year
"10": ("format_10", 1), # month
"11": ("format_11", 0), # day
"12": ("format_13", 0), # hour (not supported by babel, display minute)
"13": ("format_13", 0), # minute
"14": ("format_14", 0), # second
}
def get_str(self, result, language):
value = result.get(self.name)
if value == "" or value is None:
return None
precision = result.get(self.name + "timePrecision")
date_format = WDDateAttribute.DATE_FORMAT.get(precision)
if date_format is not None:
format_method = getattr(self, date_format[0])
precision = date_format[1]
try:
if precision >= 1:
_t = value.split("-")
if value.startswith("-"):
value = "-" + _t[1]
else:
value = _t[0]
return format_method(value, language)
except Exception: # pylint: disable=broad-except
return value
return value
def get_headers() -> dict[str, str]:
# user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits
return {"Accept": "application/sparql-results+json", "User-Agent": searxng_useragent()}
def get_label_for_entity(entity_id: str, language: str) -> str:
name = WIKIDATA_PROPERTIES.get(entity_id)
if name is None:
name = WIKIDATA_PROPERTIES.get((entity_id, language))
if name is None:
name = WIKIDATA_PROPERTIES.get((entity_id, language.split("-")[0]))
if name is None:
name = WIKIDATA_PROPERTIES.get((entity_id, "en"))
if name is None:
name = entity_id
return name
def send_wikidata_query(query: str, method="GET", **kwargs) -> dict[str, t.Any]:
if method == "GET":
# query will be cached by wikidata
http_response = get(SPARQL_ENDPOINT_URL + "?" + urlencode({"query": query}), headers=get_headers(), **kwargs)
else:
# query won't be cached by wikidata
http_response = post(SPARQL_ENDPOINT_URL, data={"query": query}, headers=get_headers(), **kwargs)
if http_response.status_code != 200:
logger.debug("SPARQL endpoint error %s", http_response.content.decode())
logger.debug("request time %s", str(http_response.elapsed))
http_response.raise_for_status()
return loads(http_response.content.decode())
def request(query: str, params: "OnlineParams") -> None:
attributes: tuple[str, list[WDAttribute | WDAmountAttribute | WDLabelAttribute | WDImageAttribute]]
eng_tag, _wiki_netloc = get_wiki_params(params["searxng_locale"], traits)
query, attributes = get_query(query, eng_tag)
logger.debug("request --> language %s // len(attributes): %s", eng_tag, len(attributes))
params["method"] = "POST"
params["url"] = SPARQL_ENDPOINT_URL
params["data"] = {"query": query}
params["headers"] = get_headers()
params["language"] = eng_tag # type: ignore
params["attributes"] = attributes # type: ignore
def response(resp: "SXNG_Response") -> list[dict[str, t.Any]]:
results: list[dict[str, t.Any]] = []
jsonresponse = loads(resp.content.decode())
language: str = resp.search_params["language"] # type: ignore
attributes = resp.search_params["attributes"] # type: ignore
logger.debug("request --> language %s // len(attributes): %s", language, len(attributes))
seen_entities: set[str] = set()
for result in jsonresponse.get("results", {}).get("bindings", []):
attribute_result = {key: value["value"] for key, value in result.items()}
entity_url = attribute_result["item"]
if entity_url not in seen_entities and entity_url not in DUMMY_ENTITY_URLS:
seen_entities.add(entity_url)
results += get_results(attribute_result, attributes, language)
else:
logger.debug("The SPARQL request returns duplicate entities: %s", str(attribute_result))
return results
_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/"
def get_thumbnail(img_src: str) -> str:
"""Get Thumbnail image from wikimedia commons
Images from commons.wikimedia.org are (HTTP) redirected to
upload.wikimedia.org. The redirected URL can be calculated by this
function.
- https://stackoverflow.com/a/33691240
"""
logger.debug("get_thumbnail(): %s", img_src)
if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_"))
img_src_name_first = img_src_name
img_src_name_second = img_src_name
if ".svg" in img_src_name.split()[0]:
img_src_name_second = img_src_name + ".png"
img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1]
img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")]
img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
img_src = (
_IMG_SRC_NEW_URL_PREFIX
+ img_src_name_md5[0]
+ "/"
+ img_src_name_md5[0:2]
+ "/"
+ img_src_name_first
+ "/"
+ img_src_size
+ "px-"
+ img_src_name_second
)
logger.debug("get_thumbnail() redirected: %s", img_src)
return img_src
def get_results(attribute_result: dict[str, t.Any], attributes, language):
# pylint: disable=too-many-branches
results = []
infobox_title = attribute_result.get("itemLabel")
infobox_id = attribute_result["item"]
infobox_id_lang = None
infobox_urls = []
infobox_attributes = []
infobox_content = attribute_result.get("itemDescription", [])
img_src = None
img_src_priority = 0
for attribute in attributes:
value = attribute.get_str(attribute_result, language)
if value is not None and value != "":
attribute_type = type(attribute)
if attribute_type in (WDURLAttribute, WDArticle):
# get_select() method : there is group_concat(distinct ...;separator=", ")
# split the value here
for url in value.split(", "):
infobox_urls.append({"title": attribute.get_label(language), "url": url, **attribute.kwargs})
# "normal" results (not infobox) include official website and Wikipedia links.
if "list" in display_type and (attribute.kwargs.get("official") or attribute_type == WDArticle):
results.append({"title": infobox_title, "url": url, "content": infobox_content})
# update the infobox_id with the wikipedia URL
# first the local wikipedia URL, and as fallback the english wikipedia URL
if attribute_type == WDArticle and (
(attribute.language == "en" and infobox_id_lang is None) or attribute.language != "en"
):
infobox_id_lang = attribute.language
infobox_id = url
elif attribute_type == WDImageAttribute:
# this attribute is an image.
# replace the current image only the priority is lower
# (the infobox contain only one image).
if attribute.priority > img_src_priority:
img_src = get_thumbnail(value)
img_src_priority = attribute.priority
elif attribute_type == WDGeoAttribute:
# geocoordinate link
# use the area to get the OSM zoom
# Note: ignore the unit (must be km² otherwise the calculation is wrong)
# Should use normalized value p:P2046/psn:P2046/wikibase:quantityAmount
area = attribute_result.get("P2046")
osm_zoom = area_to_osm_zoom(area) if area else 19
url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom)
if url:
infobox_urls.append({"title": attribute.get_label(language), "url": url, "entity": attribute.name})
else:
infobox_attributes.append(
{"label": attribute.get_label(language), "value": value, "entity": attribute.name}
)
if infobox_id:
infobox_id = replace_http_by_https(infobox_id)
# add the wikidata URL at the end
infobox_urls.append({"title": "Wikidata", "url": attribute_result["item"]})
if (
"list" in display_type
and img_src is None
and len(infobox_attributes) == 0
and len(infobox_urls) == 1
and len(infobox_content) == 0
):
results.append({"url": infobox_urls[0]["url"], "title": infobox_title, "content": infobox_content})
elif "infobox" in display_type:
results.append(
{
"infobox": infobox_title,
"id": infobox_id,
"content": infobox_content,
"img_src": img_src,
"urls": infobox_urls,
"attributes": infobox_attributes,
}
)
return results
def get_query(
query: str, language: str
) -> tuple[str, list[WDAttribute | WDAmountAttribute | WDLabelAttribute | WDImageAttribute]]:
attributes = get_attributes(language)
select = [a.get_select() for a in attributes]
where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes]))
wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes]))
group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes]))
query = (
QUERY_TEMPLATE.replace("%QUERY%", sparql_string_escape(query))
.replace("%SELECT%", " ".join(select))
.replace("%WHERE%", "\n ".join(where))
.replace("%WIKIBASE_LABELS%", "\n ".join(wikibase_label))
.replace("%GROUP_BY%", " ".join(group_by))
.replace("%LANGUAGE%", language)
)
return query, attributes
def get_attributes(language: str):
# pylint: disable=too-many-statements
attributes: list[WDAttribute | WDAmountAttribute | WDLabelAttribute | WDImageAttribute] = []
def add_value(name: str):
attributes.append(WDAttribute(name))
def add_amount(name: str):
attributes.append(WDAmountAttribute(name))
def add_label(name: str):
attributes.append(WDLabelAttribute(name))
def add_url(name: str, url_id: str | None = None, url_path_prefix: str | None = None, **kwargs):
attributes.append(WDURLAttribute(name, url_id, url_path_prefix, kwargs))
def add_image(name: str, url_id: str | None = None, priority: int = 1):
attributes.append(WDImageAttribute(name, url_id, priority))
def add_date(name: str):
attributes.append(WDDateAttribute(name))
# Dates
for p in [
"P571", # inception date
"P576", # dissolution date
"P580", # start date
"P582", # end date
"P569", # date of birth
"P570", # date of death
"P619", # date of spacecraft launch
"P620",
]: # date of spacecraft landing
add_date(p)
for p in [
"P27", # country of citizenship
"P495", # country of origin
"P17", # country
"P159",
]: # headquarters location
add_label(p)
# Places
for p in [
"P36", # capital
"P35", # head of state
"P6", # head of government
"P122", # basic form of government
"P37",
]: # official language
add_label(p)
add_value("P1082") # population
add_amount("P2046") # area
add_amount("P281") # postal code
add_label("P38") # currency
add_amount("P2048") # height (building)
# Media
for p in [
"P400", # platform (videogames, computing)
"P50", # author
"P170", # creator
"P57", # director
"P175", # performer
"P178", # developer
"P162", # producer
"P176", # manufacturer
"P58", # screenwriter
"P272", # production company
"P264", # record label
"P123", # publisher
"P449", # original network
"P750", # distributed by
"P86",
]: # composer
add_label(p)
add_date("P577") # publication date
add_label("P136") # genre (music, film, artistic...)
add_label("P364") # original language
add_value("P212") # ISBN-13
add_value("P957") # ISBN-10
add_label("P275") # copyright license
add_label("P277") # programming language
add_value("P348") # version
add_label("P840") # narrative location
# Languages
add_value("P1098") # number of speakers
add_label("P282") # writing system
add_label("P1018") # language regulatory body
add_value("P218") # language code (ISO 639-1)
# Other
add_label("P169") # ceo
add_label("P112") # founded by
add_label("P1454") # legal form (company, organization)
add_label("P137") # operator (service, facility, ...)
add_label("P1029") # crew members (tripulation)
add_label("P225") # taxon name
add_value("P274") # chemical formula
add_label("P1346") # winner (sports, contests, ...)
add_value("P1120") # number of deaths
add_value("P498") # currency code (ISO 4217)
# URL
add_url("P856", official=True) # official website
attributes.append(WDArticle(language)) # wikipedia (user language)
if not language.startswith("en"):
attributes.append(WDArticle("en")) # wikipedia (english)
add_url("P1324") # source code repository
add_url("P1581") # blog
add_url("P434", url_id="musicbrainz_artist")
add_url("P435", url_id="musicbrainz_work")
add_url("P436", url_id="musicbrainz_release_group")
add_url("P966", url_id="musicbrainz_label")
add_url("P345", url_id="imdb_id")
add_url("P2397", url_id="youtube_channel")
add_url("P1651", url_id="youtube_video")
add_url("P2002", url_id="twitter_profile")
add_url("P2013", url_id="facebook_profile")
add_url("P2003", url_id="instagram_profile")
# Fediverse
add_url("P4033", url_path_prefix="/@") # Mastodon user
add_url("P11947", url_path_prefix="/c/") # Lemmy community
add_url("P12622", url_path_prefix="/c/") # PeerTube channel
# Map
attributes.append(WDGeoAttribute("P625"))
# Image
add_image("P15", priority=1, url_id="wikimedia_image") # route map
add_image("P242", priority=2, url_id="wikimedia_image") # locator map
add_image("P154", priority=3, url_id="wikimedia_image") # logo
add_image("P18", priority=4, url_id="wikimedia_image") # image
add_image("P41", priority=5, url_id="wikimedia_image") # flag
add_image("P2716", priority=6, url_id="wikimedia_image") # collage
add_image("P2910", priority=7, url_id="wikimedia_image") # icon
return attributes
def debug_explain_wikidata_query(query: str, method: str = "GET"):
if method == "GET":
http_response = get(SPARQL_EXPLAIN_URL + "&" + urlencode({"query": query}), headers=get_headers())
else:
http_response = post(SPARQL_EXPLAIN_URL, data={"query": query}, headers=get_headers())
http_response.raise_for_status()
return http_response.content
def init(engine_settings=None): # pylint: disable=unused-argument
# WIKIDATA_PROPERTIES : add unit symbols
for k, v in WIKIDATA_UNITS.items():
WIKIDATA_PROPERTIES[k] = v["symbol"]
# WIKIDATA_PROPERTIES : add property labels
wikidata_property_names: list[str] = []
for attribute in get_attributes("en"):
if type(attribute) in (WDAttribute, WDAmountAttribute, WDURLAttribute, WDDateAttribute, WDLabelAttribute):
if attribute.name not in WIKIDATA_PROPERTIES:
wikidata_property_names.append("wd:" + attribute.name)
query = QUERY_PROPERTY_NAMES.replace("%ATTRIBUTES%", " ".join(wikidata_property_names))
jsonresponse = send_wikidata_query(query, timeout=20)
for result in jsonresponse.get("results", {}).get("bindings", {}):
name_field = result.get("name")
if not name_field:
continue
name = name_field["value"]
lang = name_field["xml:lang"]
entity_id = result["item"]["value"].replace("http://www.wikidata.org/entity/", "")
WIKIDATA_PROPERTIES[(entity_id, lang)] = name.capitalize()
def fetch_traits(engine_traits: EngineTraits):
"""Uses languages evaluated from :py:obj:`wikipedia.fetch_wikimedia_traits
` and removes
- ``traits.custom['wiki_netloc']``: wikidata does not have net-locations for
the languages and the list of all
- ``traits.custom['WIKIPEDIA_LANGUAGES']``: not used in the wikipedia engine
"""
fetch_wikimedia_traits(engine_traits)
engine_traits.custom["wiki_netloc"] = {}
engine_traits.custom["WIKIPEDIA_LANGUAGES"] = []