""":mod:`wikidata.client` --- Client session
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
import io
import json
import logging
from typing import (TYPE_CHECKING,
Callable, Mapping, MutableMapping, Optional, Sequence,
Union, cast)
import urllib.error
import urllib.parse
import urllib.request
import weakref
from .cache import CacheKey, CachePolicy, NullCachePolicy
from .entity import Entity, EntityId, EntityType
if TYPE_CHECKING:
from .datavalue import Decoder # noqa: F401
__all__ = 'WIKIDATA_BASE_URL', 'Client'
#: (:class:`str`) The default ``base_url`` of :class:`Client` constructor.
#:
#: .. versionchanged:: 0.3.0
#: As the meaning of :class:`Client` constructor's ``base_url`` parameter,
#: it now became to ``https://www.wikidata.org/`` from
#: ``https://www.wikidata.org/wiki/`` (which contained the trailing path
#: ``wiki/``).
WIKIDATA_BASE_URL = 'https://www.wikidata.org/'
[docs]class Client:
"""Wikidata client session.
:param base_url: The base url of the Wikidata.
:const:`WIKIDATA_BASE_URL` is used by default.
:type base_url: :class:`str`
:param opener: The opener for :mod:`urllib.request`.
If omitted or :const:`None` the default opener is used.
:type opener: :class:`urllib.request.OpenerDirector`
:param entity_type_guess: Whether to guess :attr:`~.entity.Entity.type`
of :class:`~.entity.Entity` from its
:attr:`~.entity.Entity.id` for less HTTP
requests. :const:`True` by default.
:type entity_type_guess: :class:`bool`
:param cache_poliy: A caching policy for API calls. No cache
(:class:`~wikidata.cache.NullCachePolicy`) by default.
:type cache_policy: :class:`~wikidata.cache.CachePolicy`
.. versionadded:: 0.5.0
The ``cache_policy`` option.
.. versionchanged:: 0.3.0
The meaning of ``base_url`` parameter changed. It originally meant
``https://www.wikidata.org/wiki/`` which contained the trailing path
``wiki/``, but now it means only ``https://www.wikidata.org/``.
.. versionadded:: 0.2.0
The ``entity_type_guess`` option.
"""
#: (:class:`bool`) Whether to guess :attr:`~.entity.Entity.type`
#: of :class:`~.entity.Entity` from its :attr:`~.entity.Entity.id`
#: for less HTTP requests.
#:
#: .. versionadded:: 0.2.0
entity_type_guess = True
#: (:class:`~typing.Union`\ [:class:`~.datavalue.Decoder`,
#: :class:`~typing.Callable`\ [[:class:`Client`, :class:`str`,
#: :class:`~typing.Mapping`\ [:class:`str`, :class:`object`]],
#: :class:`object`]])
#: The function to decode the given datavalue. It's typically an instance
#: of :class:`~.decoder.Decoder` or its subclass.
datavalue_decoder = None
#: (:class:`CachePolicy`) A caching policy for API calls.
#:
#: .. versionadded:: 0.5.0
cache_policy = NullCachePolicy() # type: CachePolicy
def __init__(self,
base_url: str = WIKIDATA_BASE_URL,
opener: Optional[urllib.request.OpenerDirector] = None,
datavalue_decoder: Union['Decoder',
Callable[['Client', str,
Mapping[str, object]],
object],
None] = None,
entity_type_guess: bool = True,
cache_policy: CachePolicy = NullCachePolicy(),
repr_string: Optional[str] = None) -> None:
if opener is None:
if urllib.request._opener is None: # type: ignore
try:
urllib.request.urlopen('')
except (ValueError, TypeError):
pass
opener = urllib.request._opener # type: ignore
assert isinstance(opener, urllib.request.OpenerDirector)
if datavalue_decoder is None:
from .datavalue import Decoder # noqa: F811
datavalue_decoder = Decoder()
assert callable(datavalue_decoder)
self.base_url = base_url
self.opener = opener # type: urllib.request.OpenerDirector
self.datavalue_decoder = datavalue_decoder
self.entity_type_guess = entity_type_guess
self.cache_policy = cache_policy # type: CachePolicy
self.identity_map = cast(MutableMapping[EntityId, Entity],
weakref.WeakValueDictionary())
self.repr_string = repr_string
[docs] def get(self, entity_id: EntityId, load: bool = False) -> Entity:
"""Get a Wikidata entity by its :class:`~.entity.EntityId`.
:param entity_id: The :attr:`~.entity.Entity.id` of
the :class:`~.entity.Entity` to find.
:type eneity_id: :class:`~.entity.EntityId`
:param load: Eager loading on :const:`True`.
Lazy loading (:const:`False`) by default.
:type load: :class:`bool`
:return: The found entity.
:rtype: :class:`~.entity.Entity`
.. versionadded:: 0.3.0
The ``load`` option.
"""
try:
entity = self.identity_map[entity_id]
except KeyError:
entity = Entity(entity_id, self)
self.identity_map[entity_id] = entity
if load:
entity.load()
return entity
[docs] def guess_entity_type(self, entity_id: EntityId) -> Optional[EntityType]:
r"""Guess :class:`~.entity.EntityType` from the given
:class:`~.entity.EntityId`. It could return :const:`None` when it
fails to guess.
.. note::
It always fails to guess when :attr:`entity_type_guess`
is configued to :const:`False`.
:return: The guessed :class:`~.entity.EntityId`, or :const:`None`
if it fails to guess.
:rtype: :class:`~typing.Optional`\ [:class:`~.entity.EntityType`]
.. versionadded:: 0.2.0
"""
if not self.entity_type_guess:
return None
if entity_id[0] == 'Q':
return EntityType.item
elif entity_id[0] == 'P':
return EntityType.property
return None
[docs] def decode_datavalue(self,
datatype: str,
datavalue: Mapping[str, object]) -> object:
"""Decode the given ``datavalue`` using the configured
:attr:`datavalue_decoder`.
.. versionadded:: 0.3.0
"""
decode = cast(Callable[[Client, str, Mapping[str, object]], object],
self.datavalue_decoder)
return decode(self, datatype, datavalue)
def request(self, path: str) -> Union[
bool, int, float, str,
Mapping[str, Union[bool, int, float, str,
Mapping[str, object], Sequence]],
Sequence[Union[bool, int, float, str, Mapping[str, object], Sequence]],
None
]:
logger = logging.getLogger(__name__ + '.Client.request')
url = urllib.parse.urljoin(self.base_url, path)
result = self.cache_policy.get(CacheKey(url))
if result is None:
logger.debug('%r: no cache; make a request...', url)
try:
response = self.opener.open(url)
except urllib.error.HTTPError as e:
logger.debug('HTTP error code: %s', e.code, exc_info=True)
if e.code == 400 and b'Invalid ID' in e.read():
return None
else:
raise e
buffer_ = io.TextIOWrapper(response,
encoding='utf-8')
result = json.load(buffer_)
self.cache_policy.set(CacheKey(url), result)
else:
logger.debug('%r: cache hit', url)
return result # type: ignore
def __repr__(self) -> str:
if self.repr_string is not None:
return self.repr_string
return '{0.__module__}.{0.__qualname__}({1!r})'.format(
type(self),
self.base_url
)