diff --git a/.gitignore b/.gitignore index c48e46e3..d2978d53 100644 --- a/.gitignore +++ b/.gitignore @@ -6,9 +6,9 @@ doc/build .ipynb_checkpoints/ .cache/ -recsys.egg-info/ +surprise.egg-info/ build dist/ -recsys/similarities.c -recsys/prediction_algorithms/matrix_factorization.c +surprise/similarities.c +surprise/prediction_algorithms/matrix_factorization.c *.so diff --git a/CHANGELOG.md b/CHANGELOG.md index 6482456e..3ce38dcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ CURRENT ======= +* Changed name from recsys to surprise * Improved printing of accuracy measures. * Added version number. * Rewrote the the __main__.py diff --git a/MANIFEST.in b/MANIFEST.in index c981ff2a..d019706e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ include LICENSE.md include requirements.txt recursive-include doc * recursive-include examples * -recursive-include recsys *.c *.pyx +recursive-include surprise *.c *.pyx diff --git a/README.md b/README.md index 7d4d6728..c4c98d80 100644 --- a/README.md +++ b/README.md @@ -1,59 +1,62 @@ [![GitHub -version](https://badge.fury.io/gh/nicolashug%2Frecsys.svg)](https://nicolashug.github.io/RecSys/) -[![Documentation Status](https://readthedocs.org/projects/recsys/badge/?version=latest)](http://recsys.readthedocs.io/en/latest/?badge=latest) +version](https://badge.fury.io/gh/nicolashug%2Fsurprise.svg)](https://nicolashug.github.io/Surprise/) +[![Documentation Status](https://readthedocs.org/projects/surprise/badge/?version=latest)](http://surprise.readthedocs.io/en/latest/?badge=latest) [![Build -Status](https://travis-ci.org/NicolasHug/RecSys.svg?branch=master)](https://travis-ci.org/NicolasHug/RecSys) +Status](https://travis-ci.org/NicolasHug/Surprise.svg?branch=master)](https://travis-ci.org/NicolasHug/Surprise) [![python_versions](https://img.shields.io/badge/python-2.7%2C%203.5-blue.svg)] -(https://nicolashug.github.io/RecSys/) -[![license](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://github.com/NicolasHug/RecSys/blob/master/LICENSE.md) +(https://nicolashug.github.io/Surprise/) +[![license](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://github.com/NicolasHug/Surprise/blob/master/LICENSE.md) -RecSys -====== +Surprise +======== Overview -------- -[RecSys](https://NicolasHug.github.io/RecSys/) is an open source Python library +[Surprise](https://NicolasHug.github.io/Surprise/) is an open source Python library that provides with tools to build and evaluate the performance of many recommender system prediction algorithms. Its goal is to make life easy(-ier) for reseachers, teachers and students who want to play around with new recommender algorithms ideas and teach/learn more about recommender systems. -[RecSys](https://NicolasHug.github.io/RecSys/) **was designed with the following +[Surprise](https://NicolasHug.github.io/Surprise/) **was designed with the following purposes in mind**: - Give the user perfect control over his experiments. To this end, a strong emphasis is laid on - [documentation](http://recsys.readthedocs.io/en/latest/index.html), which we + [documentation](http://surprise.readthedocs.io/en/latest/index.html), which we have tried to make as clear and precise as possible by pointing out every details of the algorithms. - Alleviate the pain of [Dataset - handling](http://recsys.readthedocs.io/en/latest/getting_started.html#load-a-custom-dataset). + handling](http://surprise.readthedocs.io/en/latest/getting_started.html#load-a-custom-dataset). Users can use both *built-in* datasets ([Movielens](http://grouplens.org/datasets/movielens/), [Jester](http://eigentaste.berkeley.edu/dataset/)), and their own *custom* datasets. - Provide with various ready-to-use [prediction - algorithms](http://recsys.readthedocs.io/en/latest/prediction_algorithms_package.html) (Neighborhood approaches, SVD, SVD++...) + algorithms](http://surprise.readthedocs.io/en/latest/prediction_algorithms_package.html) (Neighborhood approaches, SVD, SVD++...) - Make it easy to implement [new algorithm - ideas](http://recsys.readthedocs.io/en/latest/building_custom_algo.html). -- Provide with tools to [evaluate](http://recsys.readthedocs.io/en/latest/evaluate.html), - [analyse](http://nbviewer.jupyter.org/github/NicolasHug/RecSys/tree/master/examples/notebooks/KNNBasic_analysis.ipynb/) + ideas](http://surprise.readthedocs.io/en/latest/building_custom_algo.html). +- Provide with tools to [evaluate](http://surprise.readthedocs.io/en/latest/evaluate.html), + [analyse](http://nbviewer.jupyter.org/github/NicolasHug/Surprise/tree/master/examples/notebooks/KNNBasic_analysis.ipynb/) and - [compare](http://nbviewer.jupyter.org/github/NicolasHug/RecSys/tree/master/examples/notebooks/Compare.ipynb/) + [compare](http://nbviewer.jupyter.org/github/NicolasHug/Surprise/tree/master/examples/notebooks/Compare.ipynb/) the algorithms performance. Cross-validation procedures can be run very easily. +The name *SurPRISE* (roughly) stands for Simple Python RecommendatIon System +Engine. + Installation / Usage -------------------- The easiest way is to use pip (you'll need [numpy](http://www.numpy.org/)): - $ pip install recsys + $ pip install surprise Or you can clone the repo and build the source (you'll need [Cython](http://cython.org/) and [numpy](http://www.numpy.org/)): - $ git clone https://github.com/NicolasHug/recsys.git + $ git clone https://github.com/NicolasHug/surprise.git $ python setup.py install Example @@ -61,13 +64,13 @@ Example Here is a simple example showing how you can (down)load a dataset, split it for 3-folds cross-validation, and compute the MAE and RMSE of the -[SVD](http://recsys.readthedocs.io/en/latest/matrix_factorization.html#recsys.prediction_algorithms.matrix_factorization.SVD) +[SVD](http://surprise.readthedocs.io/en/latest/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD) algorithm. ```python -from recsys import SVD -from recsys import Dataset -from recsys import evaluate +from surprise import SVD +from surprise import Dataset +from surprise import evaluate # Load the movielens-100k dataset (download it if needed), @@ -102,18 +105,18 @@ All experiments are run on a laptop with Intel Core i3 1.7 GHz, 4Go Ram. | | RMSE | MAE | Time (s) | |-----------------|:------:|:------:|:--------:| -| [NormalPredictor](http://recsys.readthedocs.io/en/latest/basic_algorithms.html#recsys.prediction_algorithms.random_pred.NormalPredictor) | 1.5228 | 1.2242 | 4 | -| [BaselineOnly](http://recsys.readthedocs.io/en/latest/basic_algorithms.html#recsys.prediction_algorithms.baseline_only.BaselineOnly) | .9445 | .7488 | 16 | -| [KNNBasic](http://recsys.readthedocs.io/en/latest/knn_inspired.html#recsys.prediction_algorithms.knns.KNNBasic) | .9789 | .7732 | 27 | -| [KNNWithMeans](http://recsys.readthedocs.io/en/latest/knn_inspired.html#recsys.prediction_algorithms.knns.KNNWithMeans) | .9514 | .7500 | 30 | -| [KNNBaseline](http://recsys.readthedocs.io/en/latest/knn_inspired.html#recsys.prediction_algorithms.knns.KNNBaseline) | .9306 | .7334 | 44 | -| [SVD](http://recsys.readthedocs.io/en/latest/matrix_factorization.html#recsys.prediction_algorithms.matrix_factorization.SVD) | .9392 | .7409 | 46 | +| [NormalPredictor](http://surprise.readthedocs.io/en/latest/basic_algorithms.html#surprise.prediction_algorithms.random_pred.NormalPredictor) | 1.5228 | 1.2242 | 4 | +| [BaselineOnly](http://surprise.readthedocs.io/en/latest/basic_algorithms.html#surprise.prediction_algorithms.baseline_only.BaselineOnly) | .9445 | .7488 | 16 | +| [KNNBasic](http://surprise.readthedocs.io/en/latest/knn_inspired.html#surprise.prediction_algorithms.knns.KNNBasic) | .9789 | .7732 | 27 | +| [KNNWithMeans](http://surprise.readthedocs.io/en/latest/knn_inspired.html#surprise.prediction_algorithms.knns.KNNWithMeans) | .9514 | .7500 | 30 | +| [KNNBaseline](http://surprise.readthedocs.io/en/latest/knn_inspired.html#surprise.prediction_algorithms.knns.KNNBaseline) | .9306 | .7334 | 44 | +| [SVD](http://surprise.readthedocs.io/en/latest/matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD) | .9392 | .7409 | 46 | Documentation, Getting Started ------------------------------ The documentation with many other usage examples is [available -online](http://recsys.readthedocs.io/en/latest/index.html) on ReadTheDocs. +online](http://surprise.readthedocs.io/en/latest/index.html) on ReadTheDocs. License ------- diff --git a/doc/source/accuracy.rst b/doc/source/accuracy.rst index a1e21b80..140219be 100644 --- a/doc/source/accuracy.rst +++ b/doc/source/accuracy.rst @@ -4,7 +4,7 @@ accuracy module =================== -.. automodule:: recsys.accuracy +.. automodule:: surprise.accuracy :members: :undoc-members: :show-inheritance: diff --git a/doc/source/algobase.rst b/doc/source/algobase.rst index 1b76729c..09d4348d 100644 --- a/doc/source/algobase.rst +++ b/doc/source/algobase.rst @@ -3,5 +3,5 @@ The algorithm base class ------------------------ -.. automodule:: recsys.prediction_algorithms.algo_base +.. automodule:: surprise.prediction_algorithms.algo_base :members: diff --git a/doc/source/basic_algorithms.rst b/doc/source/basic_algorithms.rst index 679f9eed..274bd0e9 100644 --- a/doc/source/basic_algorithms.rst +++ b/doc/source/basic_algorithms.rst @@ -6,9 +6,9 @@ Basic algorithms These are basic algorithm that do not do much work but that are still useful for comparing accuracies. -.. autoclass:: recsys.prediction_algorithms.random_pred.NormalPredictor +.. autoclass:: surprise.prediction_algorithms.random_pred.NormalPredictor :show-inheritance: -.. autoclass:: recsys.prediction_algorithms.baseline_only.BaselineOnly +.. autoclass:: surprise.prediction_algorithms.baseline_only.BaselineOnly :show-inheritance: diff --git a/doc/source/building_custom_algo.rst b/doc/source/building_custom_algo.rst index 701f4f03..000f08e9 100644 --- a/doc/source/building_custom_algo.rst +++ b/doc/source/building_custom_algo.rst @@ -3,7 +3,7 @@ How to build you own prediction algorithm ========================================= -This page describes how to build a custom prediction algorithm using RecSys. +This page describes how to build a custom prediction algorithm using Surprise. The basics ~~~~~~~~~~ @@ -12,10 +12,10 @@ Want to get your hands dirty? Cool. Creating your own prediction algorithm is pretty simple: an algorithm is nothing but a class derived from :class:`AlgoBase -` that has an ``estimate`` +` that has an ``estimate`` method. This is the method that is called by the :meth:`predict() -` method. It takes in -an **inner** user id, an **inner** item id (see :ref:`this note +` method. It takes +in an **inner** user id, an **inner** item id (see :ref:`this note `), and returns the estimated rating :math:`\hat{r}_{ui}`: .. literalinclude:: ../../examples/building_custom_algorithms/most_basic_algorithm.py @@ -36,8 +36,8 @@ return a dictionary with given details: :: return 3, details This dictionary will be stored in the :class:`prediction -` as the ``details`` field and -can be used for later analysis. +` as the ``details`` +field and can be used for later analysis. @@ -56,19 +56,19 @@ be done by defining the ``train`` method: The ``train`` method is called by the :func:`evaluate -` function at each fold of a cross-validation +` function at each fold of a cross-validation process, (but you can also :ref:`call it yourself `). Before doing anything, you should call the base class :meth:`train() -` method. +` method. The ``trainset`` attribute ~~~~~~~~~~~~~~~~~~~~~~~~~~ Once the base class :meth:`train() -` method has returned, all -the info you need about the current training set (rating values, etc...) is +` method has returned, +all the info you need about the current training set (rating values, etc...) is stored in the ``self.trainset`` attribute. This is a :class:`Trainset -` object that has many attributes and methods of +` object that has many attributes and methods of interest for prediction. To illustrate its usage, let's make an algorithm that predicts an average @@ -90,15 +90,16 @@ When the prediction is impossible ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It's up to your algorithm to decide if it can or cannot yield a prediction. If -the prediction is impossible, then you can raise the :class:`PredictionImpossible -` exception. +the prediction is impossible, then you can raise the +:class:`PredictionImpossible +` exception. You'll need to import it first): :: - from recsys import PredictionImpossible + from surprise import PredictionImpossible This exception will be caught by the :meth:`predict() -` method, and the +` method, and the estimation :math:`\hat{r}_{ui}` will be set to the global mean of all ratings :math:`\mu`. @@ -111,10 +112,10 @@ need to accept ``bsl_options`` and ``sim_options`` as parmeters to the these parameters in the :ref:`prediction_algorithms` section. Methods :meth:`compute_baselines() -` and +` and :meth:`compute_similarities() -` can be -called in the ``train`` method (or anywhere else). +` can +be called in the ``train`` method (or anywhere else). .. literalinclude:: ../../examples/building_custom_algorithms/with_baselines_or_sim.py :caption: From file ``examples/building_custom_algorithms/.with_baselines_or_sim.py`` @@ -123,5 +124,5 @@ called in the ``train`` method (or anywhere else). Feel free to explore the prediction_algorithms package `source -`_ +`_ to get an idea of what can be done. diff --git a/doc/source/conf.py b/doc/source/conf.py index 7bd908a0..4effbc65 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# RecSys documentation build configuration file, created by +# Surprise documentation build configuration file, created by # sphinx-quickstart on Tue Dec 29 20:08:18 2015. # # This file is execfile()d with the current directory set to its @@ -56,7 +56,7 @@ master_doc = 'index' # General information about the project. -project = 'RecSys' +project = 'Surprise' copyright = '2015, Nicolas Hug' author = 'Nicolas Hug' @@ -215,7 +215,7 @@ #html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'RecSysdoc' +htmlhelp_basename = 'Surprisedoc' # -- Options for LaTeX output --------------------------------------------- @@ -237,7 +237,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'RecSys.tex', 'RecSys Documentation', + (master_doc, 'Surprise.tex', 'Surprise Documentation', 'Nicolas Hug', 'manual'), ] @@ -267,7 +267,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'recsys', 'RecSys Documentation', + (master_doc, 'surprise', 'Surprise Documentation', [author], 1) ] @@ -281,8 +281,8 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'RecSys', 'RecSys Documentation', - author, 'RecSys', 'One line description of project.', + (master_doc, 'Surprise', 'Surprise Documentation', + author, 'Surprise', 'One line description of project.', 'Miscellaneous'), ] diff --git a/doc/source/dataset.rst b/doc/source/dataset.rst index a8c80c59..c91198a7 100644 --- a/doc/source/dataset.rst +++ b/doc/source/dataset.rst @@ -3,7 +3,7 @@ dataset module =================== -.. automodule:: recsys.dataset +.. automodule:: surprise.dataset :members: :exclude-members: BuiltinDataset, read_ratings, DatasetUserFolds, parse_line diff --git a/doc/source/dump.rst b/doc/source/dump.rst index a23c82ac..19356a31 100644 --- a/doc/source/dump.rst +++ b/doc/source/dump.rst @@ -3,5 +3,5 @@ dump module =============== -.. automodule:: recsys.dump +.. automodule:: surprise.dump :members: diff --git a/doc/source/evaluate.rst b/doc/source/evaluate.rst index 815f7d3a..7abfb02b 100644 --- a/doc/source/evaluate.rst +++ b/doc/source/evaluate.rst @@ -3,6 +3,6 @@ evaluate module =============== -.. automodule:: recsys.evaluate +.. automodule:: surprise.evaluate :members: :exclude-members: CaseInsensitiveDefaultDict diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst index 2fb3af27..9e5b8ccc 100644 --- a/doc/source/getting_started.rst +++ b/doc/source/getting_started.rst @@ -9,7 +9,7 @@ Getting Started Basic usage ----------- -`RecSys `_ has a set of built-in +`Surprise `_ has a set of built-in :ref:`algorithms` and :ref:`datasets ` for you to play with. In its simplest form, it takes about four lines of code to evaluate the performance of an algorithm: @@ -20,14 +20,14 @@ evaluate the performance of an algorithm: :lines: 9- -If `RecSys `_ cannot find the +If `Surprise `_ cannot find the `movielens-100k dataset `_, it will -offer to download it and will store it under the ``.recsys_data`` folder in +offer to download it and will store it under the ``.surprise_data`` folder in your home directory. The :meth:`split() -` method automatically splits the -dataset into 3 folds and the :func:`evaluate() ` +` method automatically splits the +dataset into 3 folds and the :func:`evaluate() ` function runs the cross-validation procedure and compute some :mod:`accuracy -` measures. +` measures. .. _load_custom: @@ -35,19 +35,19 @@ function runs the cross-validation procedure and compute some :mod:`accuracy Load a custom dataset --------------------- -You can of course use a custom dataset. `RecSys -`_ offers two ways of loading a custom +You can of course use a custom dataset. `Surprise +`_ offers two ways of loading a custom dataset: - you can either specify a single file with all the ratings and - use the :meth:`split ()` method to + use the :meth:`split ()` method to perform cross-validation ; - or if your dataset is already split into predefined folds, you can specify a list of files for training and testing. -Either way, you will need to define a :class:`Reader ` -object for `RecSys `_ to be able to parse -the file(s). +Either way, you will need to define a :class:`Reader ` +object for `Surprise `_ to be able to +parse the file(s). We'll see how to handle both cases with the `movielens-100k dataset `_. Of course this is a built-in @@ -64,14 +64,14 @@ Load an entire dataset :lines: 16-25 .. note:: - Actually, as the Movielens-100k dataset is builtin, `RecSys - `_ provides with a proper reader so + Actually, as the Movielens-100k dataset is builtin, `Surprise + `_ provides with a proper reader so in this case, we could have just created the reader like this: :: reader = Reader('ml-100k') For more details about readers and how to use them, see the :class:`Reader -class ` documentation. +class ` documentation. .. _load_from_folds_example: @@ -91,21 +91,21 @@ needs to be a ``list``. Advanced usage -------------- -We will here get a little deeper on what can `RecSys -`_ do for you. +We will here get a little deeper on what can `Surprise +`_ do for you. .. _iterate_over_folds: Manually iterate over folds ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We have so far used the :func:`evaluate() ` function -that does all the hard work for us. If you want to have better control on your -experiments, you can use the :meth:`folds() ` -generator of your dataset, and then the :meth:`train() -` and :meth:`test() -` methods of your -algorithm on each of the folds: +We have so far used the :func:`evaluate() ` +function that does all the hard work for us. If you want to have better control +on your experiments, you can use the :meth:`folds() +` generator of your dataset, and then the +:meth:`train() ` and +:meth:`test() ` methods +of your algorithm on each of the folds: .. literalinclude:: ../../examples/iterate_over_folds.py :caption: From file ``examples/iterate_over_folds.py`` @@ -123,8 +123,8 @@ performing cross-validation (i.e. there is no test set). The latter is pretty straightforward: all you need is to load a dataset, and the :meth:`build_full_trainset() -` method to build the -:class:`trainset ` and train you algorithm: +` method to build the +:class:`trainset ` and train you algorithm: .. literalinclude:: ../../examples/query_for_predictions.py :caption: From file ``examples/query_for_predictions.py`` @@ -132,13 +132,14 @@ the :meth:`build_full_trainset() :lines: 15-22 Now, there's no way we could call the :meth:`test() -` method, because we have -no testset. But you can still get predictions for the users and items you want. +` method, because we +have no testset. But you can still get predictions for the users and items you +want. Let's say you're interested in user 196 and item 302 (make sure they're in the trainset!), and you know that the true rating :math:`r_{ui} = 4`. All you need is call the :meth:`predict() -` method: +` method: .. literalinclude:: ../../examples/query_for_predictions.py :caption: From file ``examples/query_for_predictions.py`` @@ -146,25 +147,25 @@ is call the :meth:`predict() :lines: 28-32 If the :meth:`predict() -` method is called +` method is called with user or item ids that were not part of the trainset, it's up to the algorithm to decide if he still can make a prediction or not. If it can't, -:meth:`predict() ` +:meth:`predict() ` will still predict the mean of all ratings :math:`\mu`. .. _raw_inner_note: .. note:: Raw ids are ids as defined in a rating file. They can be strings or whatever. On trainset creation, each raw id is mapped to a (unique) integer called - inner id, which is a lot more suitable for `RecSys - `_ to manipulate. To convert a raw id + inner id, which is a lot more suitable for `Surprise + `_ to manipulate. To convert a raw id to an inner id, you can use the :meth:`to_inner_uid() - ` and :meth:`to_inner_iid() - ` methods of the :class:`trainset - `. + ` and :meth:`to_inner_iid() + ` methods of the :class:`trainset + `. Obviously, it is perfectly fine to use the :meth:`predict() -` method directly +` method directly during a cross-validation process. It's then up to you to ensure that the user and item ids are present in the trainset though. @@ -177,15 +178,15 @@ You may want to save your algorithm predictions along with all the usefull information about the algorithm. This way, you can run your algorithm once, save the results, and go back to them whenever you want to inspect in greater details each of the predictions, and get a good insight on why your algorithm -performs well (or bad!). `RecSys `_ +performs well (or bad!). `Surprise `_ provides with some tools to do that. You can dump your algorithm predictions either using the :func:`evaluate() -` function, or do it manually with the :func:`dump -` function. Either way, an example is worth a thousand words, -so here a few `jupyter `_ notebooks: +` function, or do it manually with the :func:`dump +` function. Either way, an example is worth a thousand +words, so here a few `jupyter `_ notebooks: - `Dumping and analysis of the KNNBasic algorithm - `_. + `_. - `Comparison of two algorithms - `_. + `_. diff --git a/doc/source/index.rst b/doc/source/index.rst index 2dfafa7c..78063966 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1,28 +1,28 @@ -.. RecSys documentation master file, created by +.. Surprise documentation master file, created by sphinx-quickstart on Tue Dec 29 20:08:18 2015. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. .. _index: -Welcome to RecSys' documentation! -================================= +Welcome to Surprise' documentation! +=================================== -`RecSys `_ is an open source Python +`Surprise `_ is an open source Python library that provides with tools to build and evaluate the performance of many recommender system prediction algorithms. Its goal is to make life easy(-ier) for reseachers, teachers and students who want to play around with new recommender algorithms ideas and teach/learn more about recommender systems. -If you're new to `RecSys `_, we invite +If you're new to `Surprise `_, we invite you to take a look at the :ref:`getting_started` guide, where you'll find a -series of tutorials illustrating all you can do with `RecSys -`_. +series of tutorials illustrating all you can do with `Surprise +`_. Any kind of feedback/criticism would be greatly appreciated (software design, documentation, improvement ideas, spelling mistakes, etc...). Please feel free to contribute and send pull requests (see `GitHub page -`_)! +`_)! .. toctree:: diff --git a/doc/source/knn_inspired.rst b/doc/source/knn_inspired.rst index 5de653fe..343a7d53 100644 --- a/doc/source/knn_inspired.rst +++ b/doc/source/knn_inspired.rst @@ -18,16 +18,16 @@ approach. ratings from users (or items) that are negatively correlated. For a given prediction, the actual number of neighbors can be retrieved in the ``'actual_k'`` field of the ``details`` dictionary of the :class:`prediction - `. + `. You may want to read the :ref:`User Guide ` on how to configure the ``sim_options`` parameter. -.. autoclass:: recsys.prediction_algorithms.knns.KNNBasic +.. autoclass:: surprise.prediction_algorithms.knns.KNNBasic :show-inheritance: -.. autoclass:: recsys.prediction_algorithms.knns.KNNWithMeans +.. autoclass:: surprise.prediction_algorithms.knns.KNNWithMeans :show-inheritance: -.. autoclass:: recsys.prediction_algorithms.knns.KNNBaseline +.. autoclass:: surprise.prediction_algorithms.knns.KNNBaseline :show-inheritance: diff --git a/doc/source/matrix_factorization.rst b/doc/source/matrix_factorization.rst index 09248b0c..95f1f2ac 100644 --- a/doc/source/matrix_factorization.rst +++ b/doc/source/matrix_factorization.rst @@ -3,8 +3,8 @@ Matrix Factorization-based algortihms ------------------------------------- -.. autoclass:: recsys.prediction_algorithms.matrix_factorization.SVD +.. autoclass:: surprise.prediction_algorithms.matrix_factorization.SVD :show-inheritance: -.. autoclass:: recsys.prediction_algorithms.matrix_factorization.SVDpp +.. autoclass:: surprise.prediction_algorithms.matrix_factorization.SVDpp :show-inheritance: diff --git a/doc/source/notation_standards.rst b/doc/source/notation_standards.rst index 91051eb4..0d689d01 100644 --- a/doc/source/notation_standards.rst +++ b/doc/source/notation_standards.rst @@ -26,7 +26,7 @@ In the documentation, you will find the following notation: * :math:`\mu_i` : the mean of all ratings given to item :math:`i`. * :math:`N_i^k(u)` : the :math:`k` nearest neighbors of user :math:`u` that have rated item :math:`i`. This set is computed using a :mod:`similarity - metric `. + metric `. * :math:`N_u^k(i)` : the :math:`k` nearest neighbors of item :math:`i` that are rated by user :math:`u`. This set is computed using a :py:mod:`similarity - metric `. + metric `. diff --git a/doc/source/prediction_algorithms.rst b/doc/source/prediction_algorithms.rst index 3de156e2..eaf4af5e 100644 --- a/doc/source/prediction_algorithms.rst +++ b/doc/source/prediction_algorithms.rst @@ -3,14 +3,14 @@ Prediction algorithms ===================== -RecSys provides with a bunch of built-in algorithms. You can find the details -of each of these in the :mod:`recsys.prediction_algorithms` package +Surprise provides with a bunch of built-in algorithms. You can find the details +of each of these in the :mod:`surprise.prediction_algorithms` package documentation. -Every algorithm is part of the global RecSys namespace, so you only need to -import their names from the RecSys package, for example: :: +Every algorithm is part of the global Surprise namespace, so you only need to +import their names from the Surprise package, for example: :: - from recsys import KNNBasic + from surprise import KNNBasic algo = KNNBasic() @@ -35,7 +35,7 @@ Baselines estimates configuration \lambda \left(b_u^2 + b_i^2 \right). For algorithms using baselines in another objective function (e.g. the - :class:`SVD ` + :class:`SVD ` algorithm), the baseline configuration is done differently and is specific to each algorithm. Please refer to their own documentation. @@ -102,7 +102,7 @@ Usage examples: :lines: 30-34 Note that some similarity measures may use baselines, such as the -:func:`pearson_baseline ` similarity. +:func:`pearson_baseline ` similarity. Configuration works just the same, whether the baselines are used in the actual prediction :math:`\hat{r}_{ui}` or not: @@ -126,7 +126,7 @@ need to pass a ``sim_options`` argument at the creation of an algorithm. This argument is a dictionary with the following (all optional) keys: - ``'name'``: The name of the similarity to use, as defined in the - :mod:`similarities ` module. Default is ``'MSD'``. + :mod:`similarities ` module. Default is ``'MSD'``. - ``'user_based'``: Whether similarities will be computed between users or between items. This has a **huge** impact on the performance of a prediction algorithm. Default is ``True``. @@ -136,7 +136,7 @@ argument is a dictionary with the following (all optional) keys: :math:`|I_{uv}| < \text{min_support}` then :math:`\text{sim}(u, v) = 0`. The same goes for items. - ``'shrinkage'``: Shrinkage parameter to apply (only relevent for - :func:`pearson_baseline ` similarity). + :func:`pearson_baseline ` similarity). Default is 100. Usage examples: @@ -152,4 +152,4 @@ Usage examples: :lines: 26-29 .. seealso:: - The :mod:`similarities ` module. + The :mod:`similarities ` module. diff --git a/doc/source/prediction_algorithms_package.rst b/doc/source/prediction_algorithms_package.rst index 9f634de4..567e14ce 100644 --- a/doc/source/prediction_algorithms_package.rst +++ b/doc/source/prediction_algorithms_package.rst @@ -3,7 +3,7 @@ prediction_algorithms package ============================= -.. automodule:: recsys.prediction_algorithms +.. automodule:: surprise.prediction_algorithms You may want to check the :ref:`notation_standards` before diving into the formulas. diff --git a/doc/source/predictions_module.rst b/doc/source/predictions_module.rst index 76e755ba..633f5a18 100644 --- a/doc/source/predictions_module.rst +++ b/doc/source/predictions_module.rst @@ -3,7 +3,7 @@ The predictions module ------------------------ -.. automodule:: recsys.prediction_algorithms.predictions +.. automodule:: surprise.prediction_algorithms.predictions :members: :exclude-members: all_ratings, all_xs, all_ys diff --git a/doc/source/similarities.rst b/doc/source/similarities.rst index f5bc6b2e..3e5b1ad9 100644 --- a/doc/source/similarities.rst +++ b/doc/source/similarities.rst @@ -3,7 +3,7 @@ similarities module =================== -.. automodule:: recsys.similarities +.. automodule:: surprise.similarities :members: :exclude-members: compute_mean_diff :show-inheritance: diff --git a/examples/baselines_conf.py b/examples/baselines_conf.py index 85f0855e..511e6040 100644 --- a/examples/baselines_conf.py +++ b/examples/baselines_conf.py @@ -6,10 +6,10 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import BaselineOnly -from recsys import KNNBasic -from recsys import Dataset -from recsys import evaluate +from surprise import BaselineOnly +from surprise import KNNBasic +from surprise import Dataset +from surprise import evaluate # Load the movielens-100k dataset. diff --git a/examples/basic_usage.py b/examples/basic_usage.py index 7ac204f3..8ebf5fff 100644 --- a/examples/basic_usage.py +++ b/examples/basic_usage.py @@ -1,14 +1,14 @@ """ -This module descibes the most basic usage of RecSys: you define a prediction +This module descibes the most basic usage of surprise: you define a prediction algorithm, (down)load a dataset and evaluate the performances of the algorithm. """ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import SVD -from recsys import Dataset -from recsys import evaluate +from surprise import SVD +from surprise import Dataset +from surprise import evaluate # Load the movielens-100k dataset (download it if needed), diff --git a/examples/building_custom_algorithms/mean_rating_user_item.py b/examples/building_custom_algorithms/mean_rating_user_item.py index 436f1d30..c239f9d1 100644 --- a/examples/building_custom_algorithms/mean_rating_user_item.py +++ b/examples/building_custom_algorithms/mean_rating_user_item.py @@ -8,9 +8,9 @@ import numpy as np -from recsys import AlgoBase -from recsys import Dataset -from recsys import evaluate +from surprise import AlgoBase +from surprise import Dataset +from surprise import evaluate class MyOwnAlgorithm(AlgoBase): diff --git a/examples/building_custom_algorithms/most_basic_algorithm.py b/examples/building_custom_algorithms/most_basic_algorithm.py index d7d3c8a1..7fbba6d6 100644 --- a/examples/building_custom_algorithms/most_basic_algorithm.py +++ b/examples/building_custom_algorithms/most_basic_algorithm.py @@ -6,9 +6,9 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import AlgoBase -from recsys import Dataset -from recsys import evaluate +from surprise import AlgoBase +from surprise import Dataset +from surprise import evaluate class MyOwnAlgorithm(AlgoBase): diff --git a/examples/building_custom_algorithms/most_basic_algorithm2.py b/examples/building_custom_algorithms/most_basic_algorithm2.py index d4e486a5..9ac1ef26 100644 --- a/examples/building_custom_algorithms/most_basic_algorithm2.py +++ b/examples/building_custom_algorithms/most_basic_algorithm2.py @@ -8,9 +8,9 @@ import numpy as np -from recsys import AlgoBase -from recsys import Dataset -from recsys import evaluate +from surprise import AlgoBase +from surprise import Dataset +from surprise import evaluate class MyOwnAlgorithm(AlgoBase): diff --git a/examples/building_custom_algorithms/with_baselines_or_sim.py b/examples/building_custom_algorithms/with_baselines_or_sim.py index f7d22d73..70da752e 100644 --- a/examples/building_custom_algorithms/with_baselines_or_sim.py +++ b/examples/building_custom_algorithms/with_baselines_or_sim.py @@ -6,10 +6,10 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import AlgoBase -from recsys import Dataset -from recsys import evaluate -from recsys import PredictionImpossible +from surprise import AlgoBase +from surprise import Dataset +from surprise import evaluate +from surprise import PredictionImpossible class MyOwnAlgorithm(AlgoBase): diff --git a/examples/iterate_over_folds.py b/examples/iterate_over_folds.py index f6b113ed..ee539245 100644 --- a/examples/iterate_over_folds.py +++ b/examples/iterate_over_folds.py @@ -6,9 +6,9 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import BaselineOnly -from recsys import Dataset -from recsys import accuracy +from surprise import BaselineOnly +from surprise import Dataset +from surprise import accuracy # Load the movielens-100k dataset and split it into 3 folds for # cross-validation. diff --git a/examples/load_custom_dataset.py b/examples/load_custom_dataset.py index d677dff7..4a0d20b4 100644 --- a/examples/load_custom_dataset.py +++ b/examples/load_custom_dataset.py @@ -8,13 +8,13 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import BaselineOnly -from recsys import Dataset -from recsys import evaluate -from recsys import Reader +from surprise import BaselineOnly +from surprise import Dataset +from surprise import evaluate +from surprise import Reader # path to dataset file -file_path = '/home/nico/.recsys_data/ml-100k/ml-100k/u.data' # change this +file_path = '/home/nico/.surprise_data/ml-100k/ml-100k/u.data' # change this # As we're loading a custom dataset, we need to define a reader. In the # movielens-100k dataset, each line has the following format: diff --git a/examples/load_custom_dataset_predefined_folds.py b/examples/load_custom_dataset_predefined_folds.py index 6693d67e..b3b20d50 100644 --- a/examples/load_custom_dataset_predefined_folds.py +++ b/examples/load_custom_dataset_predefined_folds.py @@ -10,13 +10,13 @@ unicode_literals) import os -from recsys import BaselineOnly -from recsys import Dataset -from recsys import evaluate -from recsys import Reader +from surprise import BaselineOnly +from surprise import Dataset +from surprise import evaluate +from surprise import Reader # path to dataset folder -files_dir = os.path.exapanduser('~/.recsys_data/ml-100k/ml-100k/') +files_dir = os.path.exapanduser('~/.surprise_data/ml-100k/ml-100k/') # This time, we'll use the built-in reader. reader = Reader('ml-100k') diff --git a/examples/notebooks/Compare.ipynb b/examples/notebooks/Compare.ipynb index bf376f7e..c8ec95cc 100644 --- a/examples/notebooks/Compare.ipynb +++ b/examples/notebooks/Compare.ipynb @@ -24,12 +24,12 @@ "\n", "import pandas as pd\n", "\n", - "from recsys import SVD\n", - "from recsys import KNNBasic\n", - "from recsys import Dataset \n", - "from recsys import Reader \n", - "from recsys import dump\n", - "from recsys.accuracy import rmse" + "from surprise import SVD\n", + "from surprise import KNNBasic\n", + "from surprise import Dataset \n", + "from surprise import Reader \n", + "from surprise import dump\n", + "from surprise.accuracy import rmse" ] }, { @@ -59,8 +59,8 @@ "#Dataset.load_builtin('ml-100k')\n", "\n", "# Now, let's load the dataset\n", - "train_file = os.path.expanduser('~') + '/.recsys_data/ml-100k/ml-100k/u1.base'\n", - "test_file = os.path.expanduser('~') + '/.recsys_data/ml-100k/ml-100k/u1.test'\n", + "train_file = os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u1.base'\n", + "test_file = os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u1.test'\n", "data = Dataset.load_from_folds([(train_file, test_file)], Reader('ml-100k'))\n", "\n", " \n", diff --git a/examples/notebooks/KNNBasic_analysis.ipynb b/examples/notebooks/KNNBasic_analysis.ipynb index 6140c698..44b411fc 100644 --- a/examples/notebooks/KNNBasic_analysis.ipynb +++ b/examples/notebooks/KNNBasic_analysis.ipynb @@ -24,11 +24,11 @@ "\n", "import pandas as pd\n", "\n", - "from recsys import KNNBasic\n", - "from recsys import Dataset \n", - "from recsys import Reader \n", - "from recsys import dump\n", - "from recsys.accuracy import rmse" + "from surprise import KNNBasic\n", + "from surprise import Dataset \n", + "from surprise import Reader \n", + "from surprise import dump\n", + "from surprise.accuracy import rmse" ] }, { @@ -56,8 +56,8 @@ "#Dataset.load_builtin('ml-100k')\n", "\n", "# Now, let's load the dataset\n", - "train_file = os.path.expanduser('~') + '/.recsys_data/ml-100k/ml-100k/u1.base'\n", - "test_file = os.path.expanduser('~') + '/.recsys_data/ml-100k/ml-100k/u1.test'\n", + "train_file = os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u1.base'\n", + "test_file = os.path.expanduser('~') + '/.surprise_data/ml-100k/ml-100k/u1.test'\n", "data = Dataset.load_from_folds([(train_file, test_file)], Reader('ml-100k'))\n", "\n", " \n", diff --git a/examples/query_for_predictions.py b/examples/query_for_predictions.py index c236d031..1813b8d9 100644 --- a/examples/query_for_predictions.py +++ b/examples/query_for_predictions.py @@ -6,9 +6,9 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import KNNBasic -from recsys import Dataset -from recsys import evaluate +from surprise import KNNBasic +from surprise import Dataset +from surprise import evaluate # Load the movielens-100k dataset and split it into 3 folds for # cross-validation. diff --git a/examples/similarity_conf.py b/examples/similarity_conf.py index 24e2fdfd..990f4806 100644 --- a/examples/similarity_conf.py +++ b/examples/similarity_conf.py @@ -6,9 +6,9 @@ from __future__ import (absolute_import, division, print_function, unicode_literals) -from recsys import KNNBasic -from recsys import Dataset -from recsys import evaluate +from surprise import KNNBasic +from surprise import Dataset +from surprise import evaluate # Load the movielens-100k dataset. diff --git a/setup.py b/setup.py index 88f38881..1b3143e2 100644 --- a/setup.py +++ b/setup.py @@ -34,11 +34,11 @@ ext = '.pyx' if USE_CYTHON else '.c' -extensions = [Extension('recsys.similarities', - ['recsys/similarities' + ext], +extensions = [Extension('surprise.similarities', + ['surprise/similarities' + ext], include_dirs=[np.get_include()]), - Extension('recsys.prediction_algorithms.matrix_factorization', - ['recsys/prediction_algorithms/matrix_factorization' + ext], + Extension('surprise.prediction_algorithms.matrix_factorization', + ['surprise/prediction_algorithms/matrix_factorization' + ext], include_dirs=[np.get_include()]), ] @@ -49,25 +49,24 @@ ext_modules = extensions setup( - name='recsys', + name='surprise', version=__version__, description=('A recommender system package aimed towards researchers ' + 'and students.'), long_description=long_description, - url='https://github.com/NicolasHug/recsys', - download_url='https://github.com/NicolasHug/recsys/tarball/' + __version__, + url='https://nicolashug.github.io/Surprise/', license='GPLv3+', classifiers=[ - 'Development Status :: 4 - Beta', + 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Intended Audience :: Education', 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering', - 'License :: OSI Approved', + 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', ], - keywords='', + keywords='recommender recommendation system', packages=find_packages(exclude=['docs', 'tests*']), include_package_data=True, ext_modules = ext_modules, diff --git a/recsys/__init__.py b/surprise/__init__.py similarity index 93% rename from recsys/__init__.py rename to surprise/__init__.py index 5a84e68f..bfb14b94 100644 --- a/recsys/__init__.py +++ b/surprise/__init__.py @@ -20,4 +20,4 @@ 'KNNWithMeans', 'KNNBaseline', 'SVD', 'SVDpp', 'PredictionImpossible', 'Dataset', 'Reader', 'evaluate', 'dump'] -__version__ = get_distribution('recsys').version +__version__ = get_distribution('surprise').version diff --git a/recsys/__main__.py b/surprise/__main__.py similarity index 90% rename from recsys/__main__.py rename to surprise/__main__.py index 8a0d559f..f8d72f5c 100755 --- a/recsys/__main__.py +++ b/surprise/__main__.py @@ -9,17 +9,17 @@ import numpy as np -from recsys.prediction_algorithms import NormalPredictor -from recsys.prediction_algorithms import BaselineOnly -from recsys.prediction_algorithms import KNNBasic -from recsys.prediction_algorithms import KNNBaseline -from recsys.prediction_algorithms import KNNWithMeans -from recsys.prediction_algorithms import SVD -from recsys.prediction_algorithms import SVDpp -import recsys.dataset as dataset -from recsys.dataset import Dataset -from recsys.evaluate import evaluate -from recsys import __version__ +from surprise.prediction_algorithms import NormalPredictor +from surprise.prediction_algorithms import BaselineOnly +from surprise.prediction_algorithms import KNNBasic +from surprise.prediction_algorithms import KNNBaseline +from surprise.prediction_algorithms import KNNWithMeans +from surprise.prediction_algorithms import SVD +from surprise.prediction_algorithms import SVDpp +import surprise.dataset as dataset +from surprise.dataset import Dataset +from surprise.evaluate import evaluate +from surprise import __version__ def main(): @@ -39,9 +39,9 @@ def error(self, message): 'or a custom dataset, and you can choose to automatically split the ' + 'dataset into folds, or manually specify train and test files. ' + 'Please refer to the documentation page ' + - '(http://recsys.readthedocs.io/) for more details.', + '(http://surprise.readthedocs.io/) for more details.', epilog="""Example:\n - python -m recsys -algo SVD -params "{'n_epochs': 5, 'verbose': True}" + python -m surprise -algo SVD -params "{'n_epochs': 5, 'verbose': True}" -load-builtin ml-100k -n-folds 3""") algo_choices = { @@ -128,7 +128,7 @@ def error(self, message): default=None, help='Where to dump the files. Ignored if ' + 'with-dump is not set. Default is ' + - '~/.recsys_data/dumps.' + '~/.surprise_data/dumps.' ) parser.add_argument('--clean', dest='clean', action='store_true', diff --git a/recsys/accuracy.py b/surprise/accuracy.py similarity index 87% rename from recsys/accuracy.py rename to surprise/accuracy.py index 34fa7a03..f5a01eb4 100644 --- a/recsys/accuracy.py +++ b/surprise/accuracy.py @@ -1,5 +1,5 @@ """ -The :mod:`recsys.accuracy` module provides with tools for computing accuracy +The :mod:`surprise.accuracy` module provides with tools for computing accuracy metrics on a set of predictions. Available accuracy metrics: @@ -29,9 +29,9 @@ def rmse(predictions, verbose=True): Args: predictions (:obj:`list` of :obj:`Prediction\ - `): + `): A list of predictions, as returned by the :meth:`test - ` method. + ` method. verbose: If True, will print computed value. Default is ``True``. @@ -64,9 +64,9 @@ def mae(predictions, verbose=True): Args: predictions (:obj:`list` of :obj:`Prediction\ - `): + `): A list of predictions, as returned by the :meth:`test - ` method. + ` method. verbose: If True, will print computed value. Default is ``True``. @@ -98,9 +98,9 @@ def fcp(predictions, verbose=True): Args: predictions (:obj:`list` of :obj:`Prediction\ - `): + `): A list of predictions, as returned by the :meth:`test - ` method. + ` method. verbose: If True, will print computed value. Default is ``True``. diff --git a/recsys/dataset.py b/surprise/dataset.py similarity index 98% rename from recsys/dataset.py rename to surprise/dataset.py index caf0869a..6ba81c25 100644 --- a/recsys/dataset.py +++ b/surprise/dataset.py @@ -11,7 +11,7 @@ Built-in datasets can all be loaded (or downloaded if you haven't already) using the :meth:`Dataset.load_builtin` method. For each built-in dataset, -Recsys also provide predefined :class:`readers ` which are useful if +Surprise also provide predefined :class:`readers ` which are useful if you want to use a custom dataset that has the same format as a built-in one. Summary: @@ -48,8 +48,8 @@ # directory where builtin datasets are stored. For now it's in the home -# directory under the .recsys_data. May be ask user to define it? -DATASETS_DIR = os.path.expanduser('~') + '/.recsys_data/' +# directory under the .surprise_data. May be ask user to define it? +DATASETS_DIR = os.path.expanduser('~') + '/.surprise_data/' # a builtin dataset has # - an url (where to download it) @@ -458,7 +458,7 @@ class Trainset: """A trainset contains all useful data that constitutes a training set. It is used by the :meth:`train() - ` method of every + ` method of every prediction algorithm. You should not try to built such an object on your own but rather use the :meth:`Dataset.folds` method or the :meth:`DatasetAutoFolds.build_full_trainset` method. diff --git a/recsys/dump.py b/surprise/dump.py similarity index 73% rename from recsys/dump.py rename to surprise/dump.py index cc9fe13e..304cc6a4 100644 --- a/recsys/dump.py +++ b/surprise/dump.py @@ -7,15 +7,15 @@ def dump(file_name, predictions, trainset=None, algo=None): """Dump a list of :obj:`predictions - ` for future + ` for future analysis, using Pickle. - If needed, the :class:`trainset ` object and the + If needed, the :class:`trainset ` object and the algorithm can also be dumped. What is dumped is a dictionnary with keys ``'predictions``, ``'trainset'``, and ``'algo'``. The dumped algorithm won't be a proper :class:`algorithm - ` object but simply a + ` object but simply a dictionnary with the algorithm attributes as keys-values (technically, the ``algo.__dict__`` attribute). @@ -26,12 +26,12 @@ def dump(file_name, predictions, trainset=None, algo=None): predictions. predictions(list of :obj:`Prediction\ - `): The + `): The predictions to dump. - trainset(:class:`Trainset `, optional): The + trainset(:class:`Trainset `, optional): The trainset to dump. algo(:class:`Algorithm\ - `, optional): + `, optional): algorithm to dump. """ diff --git a/recsys/evaluate.py b/surprise/evaluate.py similarity index 90% rename from recsys/evaluate.py rename to surprise/evaluate.py index 69466c78..121ddd84 100644 --- a/recsys/evaluate.py +++ b/surprise/evaluate.py @@ -9,8 +9,8 @@ import os import numpy as np -from six import iteritems -from six import itervalues +from .six import iteritems +from .six import itervalues from . import accuracy from .dump import dump @@ -24,20 +24,20 @@ def evaluate(algo, data, measures=['rmse', 'mae'], with_dump=False, perform cross validation. Args: - algo(:obj:`AlgoBase `): + algo(:obj:`AlgoBase `): The algorithm to evaluate. - data(:obj:`Dataset `): The dataset on which to - evaluate the algorithm. + data(:obj:`Dataset `): The dataset on which + to evaluate the algorithm. measures(list of string): The performance measures to compute. Allowed names are function names as defined in the :mod:`accuracy - ` module. Default is ``['rmse', 'mae']``. + ` module. Default is ``['rmse', 'mae']``. with_dump(bool): If True, the predictions, the trainsets and the algorithm parameters will be dumped for later further analysis at each fold (see :ref:`User Guide `). The file names will be set as: ``'--'``. Default is ``False``. dump_dir(str): The directory where to dump to files. Default is - ``'~/.recsys/dumps/'``. + ``'~/.surprise_data/dumps/'``. verbose(int): Level of verbosity. If 0, nothing is printed. If 1 (default), accuracy measures for each folds are printed, with a final summary. If 2, every prediction is printed. @@ -71,7 +71,7 @@ def evaluate(algo, data, measures=['rmse', 'mae'], with_dump=False, if with_dump: if dump_dir is None: - dump_dir = os.path.expanduser('~') + '/.recsys_data/dumps/' + dump_dir = os.path.expanduser('~') + '/.surprise_data/dumps/' if not os.path.exists(dump_dir): os.makedirs(dump_dir) diff --git a/recsys/prediction_algorithms/__init__.py b/surprise/prediction_algorithms/__init__.py similarity index 100% rename from recsys/prediction_algorithms/__init__.py rename to surprise/prediction_algorithms/__init__.py diff --git a/recsys/prediction_algorithms/algo_base.py b/surprise/prediction_algorithms/algo_base.py similarity index 95% rename from recsys/prediction_algorithms/algo_base.py rename to surprise/prediction_algorithms/algo_base.py index bacd2505..6178c7d5 100644 --- a/recsys/prediction_algorithms/algo_base.py +++ b/surprise/prediction_algorithms/algo_base.py @@ -1,5 +1,5 @@ """ -The :mod:`recsys.prediction_algorithms.bases` module defines the base class +The :mod:`surprise.prediction_algorithms.bases` module defines the base class :class:`AlgoBase` from which every single prediction algorithm has to inherit. """ @@ -41,9 +41,9 @@ def train(self, trainset): structures and set the self.trainset attribute. Args: - trainset(:obj:`Trainset `) : A training + trainset(:obj:`Trainset `) : A training set, as returned by the :meth:`folds - ` method. + ` method. """ self.trainset = trainset @@ -70,7 +70,7 @@ def predict(self, uid, iid, r=0, verbose=False): Returns: A :obj:`Prediction\ - ` object. + ` object. """ # Convert raw ids to inner ids @@ -114,13 +114,14 @@ def test(self, testset, verbose=False): Args: testset: A test set, as returned by the :meth:`folds - ` method. + ` method. verbose(bool): Whether to print details for each predictions. Default is False. Returns: A list of :class:`Prediction\ - ` objects. + ` + objects. """ predictions = [self.predict(uid, iid, r, verbose=verbose) diff --git a/recsys/prediction_algorithms/baseline_only.py b/surprise/prediction_algorithms/baseline_only.py similarity index 100% rename from recsys/prediction_algorithms/baseline_only.py rename to surprise/prediction_algorithms/baseline_only.py diff --git a/recsys/prediction_algorithms/knns.py b/surprise/prediction_algorithms/knns.py similarity index 100% rename from recsys/prediction_algorithms/knns.py rename to surprise/prediction_algorithms/knns.py diff --git a/recsys/prediction_algorithms/matrix_factorization.pyx b/surprise/prediction_algorithms/matrix_factorization.pyx similarity index 100% rename from recsys/prediction_algorithms/matrix_factorization.pyx rename to surprise/prediction_algorithms/matrix_factorization.pyx diff --git a/recsys/prediction_algorithms/predictions.py b/surprise/prediction_algorithms/predictions.py similarity index 95% rename from recsys/prediction_algorithms/predictions.py rename to surprise/prediction_algorithms/predictions.py index 6014df72..b4bd7f00 100644 --- a/recsys/prediction_algorithms/predictions.py +++ b/surprise/prediction_algorithms/predictions.py @@ -1,5 +1,5 @@ """ -The :mod:`recsys.prediction_algorithms.predictions` module defines the +The :mod:`surprise.prediction_algorithms.predictions` module defines the :class:`Prediction` named tuple and the :class:`PredictionImpossible` exception. """ diff --git a/recsys/prediction_algorithms/random_pred.py b/surprise/prediction_algorithms/random_pred.py similarity index 100% rename from recsys/prediction_algorithms/random_pred.py rename to surprise/prediction_algorithms/random_pred.py diff --git a/recsys/similarities.pyx b/surprise/similarities.pyx similarity index 98% rename from recsys/similarities.pyx rename to surprise/similarities.pyx index 90fec3e4..cd346317 100644 --- a/recsys/similarities.pyx +++ b/surprise/similarities.pyx @@ -1,6 +1,6 @@ """ -The :mod:`similarities ` module includes tools to compute -similarity metrics between users or items. You may need to refer to the +The :mod:`similarities ` module includes tools to +compute similarity metrics between users or items. You may need to refer to the :ref:`notation_standards` page. See also the :ref:`similarity_measures_configuration` section of the User Guide. diff --git a/recsys/six.py b/surprise/six.py similarity index 100% rename from recsys/six.py rename to surprise/six.py diff --git a/tests/test_SVD.py b/tests/test_SVD.py index 51ed7500..dc41cafa 100644 --- a/tests/test_SVD.py +++ b/tests/test_SVD.py @@ -6,11 +6,11 @@ unicode_literals) import os -from recsys.prediction_algorithms import SVD -from recsys.prediction_algorithms import SVDpp -from recsys.dataset import Dataset -from recsys.dataset import Reader -from recsys.evaluate import evaluate +from surprise.prediction_algorithms import SVD +from surprise.prediction_algorithms import SVDpp +from surprise.dataset import Dataset +from surprise.dataset import Reader +from surprise.evaluate import evaluate # the test and train files are from the ml-100k dataset (10% of u1.base and diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py index 6804a65f..19894870 100644 --- a/tests/test_accuracy.py +++ b/tests/test_accuracy.py @@ -6,7 +6,7 @@ import pytest -import recsys.accuracy +import surprise.accuracy def pred(true_r, est, u0=None): @@ -18,33 +18,33 @@ def test_mae(): """Tests for the MAE function.""" predictions = [pred(0, 0), pred(1, 1), pred(2, 2), pred(100, 100)] - assert recsys.accuracy.mae(predictions) == 0 + assert surprise.accuracy.mae(predictions) == 0 predictions = [pred(0, 0), pred(0, 2)] - assert recsys.accuracy.mae(predictions) == abs(0 - 2) / 2 + assert surprise.accuracy.mae(predictions) == abs(0 - 2) / 2 predictions = [pred(2, 0), pred(3, 4)] - assert recsys.accuracy.mae(predictions) == (abs(2 - 0) + abs(3 - 4)) / 2 + assert surprise.accuracy.mae(predictions) == (abs(2 - 0) + abs(3 - 4)) / 2 with pytest.raises(ValueError): - recsys.accuracy.mae([]) + surprise.accuracy.mae([]) def test_rmse(): """Tests for the RMSE function.""" predictions = [pred(0, 0), pred(1, 1), pred(2, 2), pred(100, 100)] - assert recsys.accuracy.rmse(predictions) == 0 + assert surprise.accuracy.rmse(predictions) == 0 predictions = [pred(0, 0), pred(0, 2)] - assert recsys.accuracy.rmse(predictions) == sqrt((0 - 2)**2 / 2) + assert surprise.accuracy.rmse(predictions) == sqrt((0 - 2)**2 / 2) predictions = [pred(2, 0), pred(3, 4)] - assert recsys.accuracy.rmse(predictions) == sqrt( + assert surprise.accuracy.rmse(predictions) == sqrt( ((2 - 0)**2 + (3 - 4)**2) / 2) with pytest.raises(ValueError): - recsys.accuracy.rmse([]) + surprise.accuracy.rmse([]) def test_fcp(): @@ -52,19 +52,19 @@ def test_fcp(): predictions = [pred(0, 0, u0='u1'), pred(1, 1, u0='u1'), pred(2, 2, u0='u2'), pred(100, 100, u0='u2')] - assert recsys.accuracy.fcp(predictions) == 1 + assert surprise.accuracy.fcp(predictions) == 1 predictions = [pred(0, 0, u0='u1'), pred(0, 0, u0='u1')] with pytest.raises(ValueError): - recsys.accuracy.fcp(predictions) + surprise.accuracy.fcp(predictions) predictions = [pred(0, 0, u0='u1')] with pytest.raises(ValueError): - recsys.accuracy.fcp(predictions) + surprise.accuracy.fcp(predictions) predictions = [pred(0, 1, u0='u1'), pred(1, 0, u0='u1'), pred(2, 0.5, u0='u2'), pred(0, 0.6, u0='u2')] - assert recsys.accuracy.fcp(predictions) == 0 + assert surprise.accuracy.fcp(predictions) == 0 with pytest.raises(ValueError): - recsys.accuracy.fcp([]) + surprise.accuracy.fcp([]) diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py index 96d427dd..028a8db5 100644 --- a/tests/test_algorithms.py +++ b/tests/test_algorithms.py @@ -6,15 +6,15 @@ unicode_literals) import os -from recsys.prediction_algorithms import NormalPredictor -from recsys.prediction_algorithms import BaselineOnly -from recsys.prediction_algorithms import KNNBasic -from recsys.prediction_algorithms import KNNWithMeans -from recsys.prediction_algorithms import KNNBaseline -from recsys.prediction_algorithms import SVD -from recsys.prediction_algorithms import SVDpp -from recsys.dataset import Dataset -from recsys.dataset import Reader +from surprise.prediction_algorithms import NormalPredictor +from surprise.prediction_algorithms import BaselineOnly +from surprise.prediction_algorithms import KNNBasic +from surprise.prediction_algorithms import KNNWithMeans +from surprise.prediction_algorithms import KNNBaseline +from surprise.prediction_algorithms import SVD +from surprise.prediction_algorithms import SVDpp +from surprise.dataset import Dataset +from surprise.dataset import Reader def test_unknown_user_or_item(): diff --git a/tests/test_bsl_options.py b/tests/test_bsl_options.py index ade84d4a..80ca3be6 100644 --- a/tests/test_bsl_options.py +++ b/tests/test_bsl_options.py @@ -6,10 +6,10 @@ import pytest -from recsys.prediction_algorithms import BaselineOnly -from recsys.dataset import Dataset -from recsys.dataset import Reader -from recsys.evaluate import evaluate +from surprise.prediction_algorithms import BaselineOnly +from surprise.dataset import Dataset +from surprise.dataset import Reader +from surprise.evaluate import evaluate # the test and train files are from the ml-100k dataset (10% of u1.base and diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 3f127ee3..2617e0dd 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -8,8 +8,8 @@ import pytest -from recsys import Dataset -from recsys import Reader +from surprise import Dataset +from surprise import Reader reader = Reader(line_format='user item rating', sep=' ', skip_lines=3, diff --git a/tests/test_dump.py b/tests/test_dump.py index 75aed8d8..69010e48 100644 --- a/tests/test_dump.py +++ b/tests/test_dump.py @@ -6,8 +6,8 @@ import pytest -from recsys.prediction_algorithms.predictions import Prediction -from recsys import dump +from surprise.prediction_algorithms.predictions import Prediction +from surprise import dump def test_dump(): diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index d709be7f..42605cd5 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -6,10 +6,10 @@ unicode_literals) import os -from recsys import NormalPredictor -from recsys.dataset import Dataset -from recsys.dataset import Reader -from recsys.evaluate import evaluate +from surprise import NormalPredictor +from surprise.dataset import Dataset +from surprise.dataset import Reader +from surprise.evaluate import evaluate def test_performances(): diff --git a/tests/test_reader.py b/tests/test_reader.py index 481cb4b4..8ea8d16a 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -7,7 +7,7 @@ import pytest -from recsys import Reader +from surprise import Reader def test_params(): diff --git a/tests/test_sim_options.py b/tests/test_sim_options.py index 5596e2d2..52cfbeac 100644 --- a/tests/test_sim_options.py +++ b/tests/test_sim_options.py @@ -9,12 +9,12 @@ import pytest -from recsys.prediction_algorithms import KNNBasic -from recsys.prediction_algorithms import KNNWithMeans -from recsys.prediction_algorithms import KNNBaseline -from recsys.dataset import Dataset -from recsys.dataset import Reader -from recsys.evaluate import evaluate +from surprise.prediction_algorithms import KNNBasic +from surprise.prediction_algorithms import KNNWithMeans +from surprise.prediction_algorithms import KNNBaseline +from surprise.dataset import Dataset +from surprise.dataset import Reader +from surprise.evaluate import evaluate # the test and train files are from the ml-100k dataset (10% of u1.base and diff --git a/tests/test_similarities.py b/tests/test_similarities.py index dfb8dd2d..9421d83f 100644 --- a/tests/test_similarities.py +++ b/tests/test_similarities.py @@ -8,7 +8,7 @@ import numpy as np -import recsys.similarities as sims +import surprise.similarities as sims n_x = 7