maciejkula · mokarakaya · Mar 16, 2018 · Mar 16, 2018 · Mar 16, 2018 · Mar 22, 2018
diff --git a/spotlight/evaluation.py b/spotlight/evaluation.py
@@ -201,3 +201,65 @@ def rmse_score(model, test):
     predictions = model.predict(test.user_ids, test.item_ids)
 
     return np.sqrt(((test.ratings - predictions) ** 2).mean())
+
+
+def intra_distance_score(model, test, train, k=10):
+    """
+    Compute IntraDistance@k diversity of a set of recommended items which is defined
+    as the average pairwise distance of the items in the set.
+
+    In early definitions, it's called average dissimilarity [2]
+    It's best known as average intra-list distance [1]
+
+    .. [1] Castells, P., Hurley, N.J. and Vargas, S., 2015.
+    Novelty and diversity in recommender systems. In Recommender Systems Handbook (pp. 881-918).
+    Springer, Boston, MA.
+
+    .. [2] Hurley, N. and Zhang, M., 2011.
+        Novelty and diversity in top-n recommendation--analysis and evaluation.
+        ACM Transactions on Internet Technology (TOIT), 10(4), p.14.
+
+    Distance between items i,j is calculated as;
+    1 - intersection(i,j) / length(i) * length(j)
+
+    Parameters
+    ----------
+
+    model: fitted instance of a recommender model
+        The model to evaluate.
+    test: :class:`spotlight.interactions.Interactions`
+        Test interactions.
+    train: :class:`spotlight.interactions.Interactions`, optional
+        Train interactions. If supplied, scores of known
+        interactions will not affect the computed metrics.
+    k: int or array of int,
+        The maximum number of predicted items
+    Returns
+    -------
+
+    (IntraDistance@k): numpy array of shape (len(users), len(k * (k-1) / 2)
+        A list of distances between each item in recommendation
+        list with length k for each test user.
+    """
+
+    distances = []
+    test = test.tocsr()
+    mat = train.tocoo().T.tocsr()
+    lengths = mat.getnnz(axis=1)
+    for user_id, row in enumerate(test):
+        predictions = -model.predict(user_id)
+        rec_list = predictions.argsort()[:k]
+        distance = [
+            _get_distance(mat, lengths, first_item, second_item)
+            for i, first_item in enumerate(rec_list)
+            for second_item in rec_list[(i + 1):]
+        ]
+        distances.append(distance)
+    return np.array(distances)
+
+
+def _get_distance(mat, lengths, first_item, second_item):
+    numerator = np.in1d(mat[first_item].indices, mat[second_item].indices, assume_unique=True).sum()
+    denominator = lengths[first_item] * lengths[second_item]
+    distance = numerator / denominator
+    return 1 - distance
diff --git a/tests/test_evaluation_metrics.py b/tests/test_evaluation_metrics.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from spotlight.evaluation import precision_recall_score
+from spotlight.evaluation import precision_recall_score, intra_distance_score
 from spotlight.cross_validation import random_train_test_split
 from spotlight.datasets import movielens
 from spotlight.factorization.implicit import ImplicitFactorizationModel
@@ -54,3 +54,14 @@ def test_precision_recall(data, k):
         assert len(precision.shape) == 1
     else:
         assert precision.shape[1] == len(k)
+
+
+def test_intra_distance(data):
+
+    (train, test, model) = data
+
+    k = 5
+    distances = intra_distance_score(model, test, train, k=k)
+
+    assert len(distances) == test.num_users
+    assert len(distances[0]) == (k * (k - 1)) / 2