""":mod:`wikidata.entity` --- Wikidata entities
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
import collections.abc
import enum
import logging
import pprint
from typing import (TYPE_CHECKING, Iterator, Mapping, NewType,
Optional, Sequence, Tuple, Union,
cast)
from .multilingual import MultilingualText
if TYPE_CHECKING:
from .client import Client # noqa: F401
__all__ = 'Entity', 'EntityId', 'EntityState', 'EntityType'
#: The identifier of each :class:`Entity`. Alias of :class:`str`.
EntityId = NewType('EntityId', str)
class multilingual_attribute:
"""Define accessor to a multilingual attribute of entity."""
def __init__(self, attribute: str) -> None:
self.attribute = attribute
def __get__(self,
obj: 'Entity',
cls=None) -> Union[MultilingualText, type]:
if obj is None:
return self
cache_id = '$' + self.attribute
try:
value = obj.__dict__[cache_id]
except KeyError:
attr = obj.attributes.get(self.attribute) or {}
assert isinstance(attr, collections.abc.Mapping)
pairs = (
(item['language'], item['value'])
for item in attr.values()
)
value = MultilingualText({k: v for k, v in pairs if k})
obj.__dict__[cache_id] = value
return value
class EntityState(enum.Enum):
"""Define state of :class:`Entity`.
.. versionadded:: 0.7.0
"""
#: (:class:`EntityState`) Not loaded yet. Unknown whether the entity
#: does exist or not.
not_loaded = 'not_loaded'
#: (:class:`EntityState`) The entity exists and is already loaded.
loaded = 'loaded'
#: (:class:`EntityState`) The entity does not exist.
non_existent = 'non_existent'
class EntityType(enum.Enum):
"""The enumerated type which consists of two possible values:
- :attr:`~EntityType.item`
- :attr:`~EntityType.property`
.. versionadded:: 0.2.0
"""
#: (:class:`EntityType`) Items are :class:`Entity` objects that are
#: typically represented by Wikipage (at least in some Wikipedia
#: languages). They can be viewed as "the thing that a Wikipage is about,"
#: which could be an individual thing (the person `Albert Einstein`_),
#: a general class of things (the class of all Physicists_),
#: and any other concept that is the subject of some Wikipedia page
#: (including things like `History of Berlin`_).
#:
#: .. seealso::
#:
#: Items_ --- Wikibase Data Model
#: The data model of Wikibase describes the structure of
#: the data that is handled in Wikibase.
#:
#: .. _Albert Einstein: https://en.wikipedia.org/wiki/Albert_Einstein
#: .. _Physicists: https://en.wikipedia.org/wiki/Physicist
#: .. _History of Berlin: https://en.wikipedia.org/wiki/History_of_Berlin
#: .. _Items: https://www.mediawiki.org/wiki/Wikibase/DataModel#Items
item = 'item'
#: (:class:`EntityType`) Properties are :class:`Entity` objects that
#: describe a relationship between items (or other :class:`Entity` objects)
#: and values of the property. Typical properties are *population*
#: (using numbers as values), *binomial name* (using strings as values),
#: but also *has father* and *author of* (both using items as values).
#:
#: .. seealso::
#:
#: Properties_ --- Wikibase Data Model
#: The data model of Wikibase describes the structure of
#: the data that is handled in Wikibase.
#:
#: .. _Properties: https://mediawiki.org/wiki/Wikibase/DataModel#Properties
property = 'property'
[docs]class Entity(collections.abc.Mapping, collections.abc.Hashable):
r"""Wikidata entity. Can be an item or a property. Its attrributes
can be lazily loaded.
To get an entity use :meth:`Client.get() <wikidata.client.Client.get>`
method instead of the constructor of :class:`Entity`.
.. note::
Although it implements :class:`~typing.Mapping`\ [:class:`EntityId`,
:class:`object`], it actually is multidict. See also :meth:`getlist()`
method.
.. versionchanged:: 0.2.0
Implemented :class:`~typing.Mapping`\ [:class:`EntityId`,
:class:`object`] protocol for easy access of statement values.
.. versionchanged:: 0.2.0
Implemented :class:`~typing.Hashable` protocol and
:token:`==`/:token:`!=` operators for equality test.
.. attribute:: state
(:class:`EntityState`) The loading state.
.. versionadded:: 0.7.0
"""
label = multilingual_attribute('labels')
description = multilingual_attribute('descriptions')
def __init__(self, id: EntityId, client: 'Client') -> None:
self.id = id
self.client = client
self.data = None # type: Optional[Mapping[str, object]]
self.state = EntityState.not_loaded # type: EntityState
def __eq__(self, other) -> bool:
if not isinstance(other, type(self)):
raise TypeError(
'expected an instance of {0.__module__}.{0.__qualname__}, '
'not {1!r}'.format(type(self), other)
)
return other.id == self.id and self.client is other.client
def __hash__(self) -> int:
return hash((self.id, id(self.client)))
def __len__(self) -> int:
claims_map = self.attributes.get('claims') or {}
assert isinstance(claims_map, collections.abc.Mapping)
return len(claims_map)
def __iter__(self) -> Iterator['Entity']:
client = self.client
claims_map = self.attributes.get('claims') or {}
assert isinstance(claims_map, collections.abc.Mapping)
for prop_id in claims_map:
yield client.get(prop_id)
def __getitem__(self, key: 'Entity') -> object:
result = self.getlist(key)
if result:
return result[0]
raise KeyError(key)
[docs] def getlist(self, key: 'Entity') -> Sequence[object]:
r"""Return all values associated to the given ``key`` property
in sequence.
:param key: The property entity.
:type key: :class:`Entity`
:return: A sequence of all values associated to the given ``key``
property. It can be empty if nothing is associated to
the property.
:rtype: :class:`~typing.Sequence`\ [:class:`object`]
"""
if not (isinstance(key, type(self)) and
key.type is EntityType.property):
return []
claims_map = self.attributes.get('claims') or {}
assert isinstance(claims_map, collections.abc.Mapping)
claims = claims_map.get(key.id, [])
claims.sort(key=lambda claim: claim['rank'], # FIXME
reverse=True)
logger = logging.getLogger(__name__ + '.Entity.getitem')
if logger.isEnabledFor(logging.DEBUG):
logger.debug('claim data: %s', pprint.pformat(claims))
decode = self.client.decode_datavalue
return [decode(snak['datatype'], snak['datavalue'])
for snak in (claim['mainsnak'] for claim in claims)
if snak['snaktype'] == 'value']
def iterlists(self) -> Iterator[Tuple['Entity', Sequence[object]]]:
for prop in self:
yield prop, self.getlist(prop)
[docs] def lists(self) -> Sequence[Tuple['Entity', Sequence[object]]]:
"""Similar to :meth:`items()` except the returning pairs have
each list of values instead of each single value.
:return: The pairs of (key, values) where values is a sequence.
:rtype: :class:`~typing.Sequence`\\ [:class:`~typing.Tuple`\\ \
[:class:`Entity`, :class:`~typing.Sequence`\\ [:class:`object`]]]
"""
return list(self.iterlists())
def iterlistvalues(self) -> Iterator[Sequence[object]]:
for _, values in self.iterlists():
yield values
def listvalues(self) -> Sequence[Sequence[object]]:
return list(self.iterlistvalues())
@property
def type(self) -> EntityType:
"""(:class:`EntityType`) The type of entity, :attr:`~EntityType.item`
or :attr:`~EntityType.property`.
.. versionadded:: 0.2.0
"""
if self.data is None:
guessed_type = self.client.guess_entity_type(self.id)
if guessed_type is not None:
return guessed_type
# If guessing was failed follow the straightforward way.
return EntityType(self.attributes['type'])
@property
def attributes(self) -> Mapping[str, object]:
if self.data is None:
self.load()
assert self.data is not None
return self.data
def load(self) -> None:
if self.state is EntityState.non_existent:
return
url = './wiki/Special:EntityData/{}.json'.format(self.id)
result = self.client.request(url)
if result is None:
self.state = EntityState.non_existent
return
assert isinstance(result, collections.abc.Mapping)
entities = result['entities']
assert isinstance(entities, collections.abc.Mapping)
assert len(entities) == 1
redirected = False
entity_id = self.id
try:
data = entities[entity_id]
self.state = EntityState.loaded
except KeyError:
entity_id = cast(EntityId, next(iter(entities)))
data = entities[entity_id]
redirected = True
self.state = EntityState.not_loaded
assert isinstance(data, collections.abc.Mapping)
self.data = data
self.id = entity_id
if redirected:
canon = self.client.get(entity_id, load=False)
if canon.data is None:
canon.data = dict(data)
def __repr__(self) -> str:
if self.data:
label = str(self.label) if self.label else ...
else:
label = None
return '<{0.__module__}.{0.__qualname__} {1}{2}>'.format(
type(self), self.id,
' {!r}'.format(label) if label else ''
)