From 4c5aafb2f4d4ca00f2303fc10ffe8ef787c39e9c Mon Sep 17 00:00:00 2001 From: Simon Zhao <43029286+simonzhaoms@users.noreply.github.com> Date: Fri, 18 Mar 2022 19:55:35 +0800 Subject: [PATCH] Update movielens dataset download url --- examples/00_quick_start/ncf_movielens.ipynb | 2 +- examples/01_prepare_data/data_split.ipynb | 2 +- examples/02_model_hybrid/lightfm_deep_dive.ipynb | 2 +- recommenders/datasets/movielens.py | 10 +++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/00_quick_start/ncf_movielens.ipynb b/examples/00_quick_start/ncf_movielens.ipynb index c38aeb76c3..bcaf20ea14 100644 --- a/examples/00_quick_start/ncf_movielens.ipynb +++ b/examples/00_quick_start/ncf_movielens.ipynb @@ -112,7 +112,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:recommenders.datasets.download_utils:Downloading http://files.grouplens.org/datasets/movielens/ml-100k.zip\n", + "INFO:recommenders.datasets.download_utils:Downloading https://files.grouplens.org/datasets/movielens/ml-100k.zip\n", "100%|██████████| 4.81k/4.81k [00:00<00:00, 16.9kKB/s]\n" ] } diff --git a/examples/01_prepare_data/data_split.ipynb b/examples/01_prepare_data/data_split.ipynb index babef037f0..9156fbe462 100644 --- a/examples/01_prepare_data/data_split.ipynb +++ b/examples/01_prepare_data/data_split.ipynb @@ -73,7 +73,7 @@ "cell_type": "code", "execution_count": 4, "source": [ - "DATA_URL = \"http://files.grouplens.org/datasets/movielens/ml-100k/u.data\"\n", + "DATA_URL = \"https://files.grouplens.org/datasets/movielens/ml-100k/u.data\"\n", "DATA_PATH = \"ml-100k.data\"\n", "\n", "COL_USER = \"UserId\"\n", diff --git a/examples/02_model_hybrid/lightfm_deep_dive.ipynb b/examples/02_model_hybrid/lightfm_deep_dive.ipynb index a420b2a3c3..0da939c5b1 100755 --- a/examples/02_model_hybrid/lightfm_deep_dive.ipynb +++ b/examples/02_model_hybrid/lightfm_deep_dive.ipynb @@ -1030,7 +1030,7 @@ } ], "source": [ - "user_feature_URL = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.user'\n", + "user_feature_URL = 'https://files.grouplens.org/datasets/movielens/ml-100k/u.user'\n", "user_data = pd.read_table(user_feature_URL, \n", " sep='|', header=None)\n", "user_data.columns = ['userID','age','gender','occupation','zipcode']\n", diff --git a/recommenders/datasets/movielens.py b/recommenders/datasets/movielens.py index 8ad6d314b6..8add4bc347 100644 --- a/recommenders/datasets/movielens.py +++ b/recommenders/datasets/movielens.py @@ -159,7 +159,7 @@ def load_pandas_df( ): """Loads the MovieLens dataset as pd.DataFrame. - Download the dataset from http://files.grouplens.org/datasets/movielens, unzip, and load. + Download the dataset from https://files.grouplens.org/datasets/movielens, unzip, and load. To load movie information only, you can use load_item_df function. Args: @@ -304,7 +304,7 @@ def _load_item_df(size, item_datapath, movie_col, title_col, genres_col, year_co genres_header_100k = None if genres_col is not None: # 100k data's movie genres are encoded as a binary array (the last 19 fields) - # For details, see http://files.grouplens.org/datasets/movielens/ml-100k-README.txt + # For details, see https://files.grouplens.org/datasets/movielens/ml-100k-README.txt if size == "100k": genres_header_100k = [*(str(i) for i in range(19))] item_header.extend(genres_header_100k) @@ -366,7 +366,7 @@ def load_spark_df( ): """Loads the MovieLens dataset as `pyspark.sql.DataFrame`. - Download the dataset from http://files.grouplens.org/datasets/movielens, unzip, and load as `pyspark.sql.DataFrame`. + Download the dataset from https://files.grouplens.org/datasets/movielens, unzip, and load as `pyspark.sql.DataFrame`. To load movie information only, you can use `load_item_df` function. @@ -552,7 +552,7 @@ def download_movielens(size, dest_path): if size not in DATA_FORMAT: raise ValueError(ERROR_MOVIE_LENS_SIZE) - url = "http://files.grouplens.org/datasets/movielens/ml-" + size + ".zip" + url = "https://files.grouplens.org/datasets/movielens/ml-" + size + ".zip" dirs, file = os.path.split(dest_path) maybe_download(url, file, work_directory=dirs) @@ -587,7 +587,7 @@ class MockMovielensSchema(pa.SchemaModel): Mock dataset schema to generate fake data for testing purpose. This schema is configured to mimic the Movielens dataset - http://files.grouplens.org/datasets/movielens/ml-100k/ + https://files.grouplens.org/datasets/movielens/ml-100k/ Dataset schema and generation is configured using pandera. Please see https://pandera.readthedocs.io/en/latest/schema_models.html