"""
Utilities for fetching the Movielens datasets [1]_.
References
----------
.. [1] https://grouplens.org/datasets/movielens/
"""
import os
import h5py
from spotlight.datasets import _transport
from spotlight.interactions import Interactions
VARIANTS = ('100K',
'1M',
'10M',
'20M')
URL_PREFIX = ('https://github.com/maciejkula/recommender_datasets/'
'releases/download')
VERSION = 'v0.2.0'
def _get_movielens(dataset):
extension = '.hdf5'
path = _transport.get_data('/'.join((URL_PREFIX,
VERSION,
dataset + extension)),
os.path.join('movielens', VERSION),
'movielens_{}{}'.format(dataset,
extension))
with h5py.File(path, 'r') as data:
return (data['/user_id'][:],
data['/item_id'][:],
data['/rating'][:],
data['/timestamp'][:])
[docs]def get_movielens_dataset(variant='100K'):
"""
Download and return one of the Movielens datasets.
Parameters
----------
variant: string, optional
String specifying which of the Movielens datasets
to download. One of ('100K', '1M', '10M', '20M').
Returns
-------
Interactions: :class:`spotlight.interactions.Interactions`
instance of the interactions class
"""
if variant not in VARIANTS:
raise ValueError('Variant must be one of {}, '
'got {}.'.format(VARIANTS, variant))
url = 'movielens_{}'.format(variant)
return Interactions(*_get_movielens(url))