From 9d0c3dab544fde23b8ddc254dc71ca43366afcb7 Mon Sep 17 00:00:00 2001 From: The TensorFlow Datasets Authors Date: Tue, 30 Jul 2024 08:14:09 -0700 Subject: [PATCH] Add a test to check the gated datasets warnings. PiperOrigin-RevId: 657597153 --- .../huggingface_dataset_builder.py | 2 +- .../huggingface_dataset_builder_test.py | 26 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py b/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py index a3373cdd28c..d1890b5d084 100644 --- a/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py +++ b/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder.py @@ -332,7 +332,7 @@ def _hf_features(self) -> hf_datasets.Features: def _info(self) -> dataset_info_lib.DatasetInfo: ds_description = self._get_text_field('description') - ds_license = self._get_license() + ds_license = self._get_license() or '' if self._is_gated(): ds_description = self._gated_text + '\n' + ds_description ds_license = ds_license + ' ' + self._gated_dataset_warning diff --git a/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py b/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py index a15c21a2d4f..7e0f3fb6634 100644 --- a/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py +++ b/tensorflow_datasets/core/dataset_builders/huggingface_dataset_builder_test.py @@ -82,6 +82,8 @@ def mock_hub_dataset_info(): downloads=123, likes=456, tags=[], + gated='automatic', + card_data={'extra_gated_prompt': 'Extra condition'}, ) with mock.patch.object( huggingface_hub, 'dataset_info', return_value=fake_dataset_info @@ -111,12 +113,32 @@ def mock_huggingface_dataset_builder( def test_dataset_info(builder): - assert builder.info.description == 'description' + assert builder.info.description.endswith('description') assert builder.info.citation == 'citation from the hub' - assert builder.info.redistribution_info.license == 'test-license' + assert builder.info.redistribution_info.license.startswith('test-license') assert builder.info.homepage == 'https://huggingface.co/datasets/foo/bar' +def test_gated_text(builder): + expected_warning = ( + 'WARNING: This dataset is gated. Before using it, make sure to sign the' + ' conditions at: https://huggingface.co/datasets/foo/bar. Important:' + ' access requests are always granted to individual users rather than to' + ' entire organizations.' + ) + expected_conditions = ( + 'The conditions consist of:\nBy agreeing you' + ' accept to share your contact information (email and username) with the' + ' repository authors.\nExtra condition' + ) + expected_gated_text = expected_warning + '\n' + expected_conditions + assert builder._gated_text == expected_gated_text + expected_description = expected_gated_text + '\n' + 'description' + assert builder.info.description == expected_description + expected_license = 'test-license' + ' ' + expected_warning + assert builder.info.redistribution_info.license == expected_license + + def test_download_and_prepare(builder): builder.download_and_prepare() ds = builder.as_data_source()