diff --git a/test/test_remote_io.py b/test/test_remote_io.py index 1e432282e..c241ed32c 100644 --- a/test/test_remote_io.py +++ b/test/test_remote_io.py @@ -83,6 +83,12 @@ def test_http_reader_iterdatapipe(self): # __len__ Test: returns the length of source DataPipe self.assertEqual(1, len(http_reader_dp)) + # Error Test: test if the Http Reader raises an error when the url is invalid + error_url = "https://github.com/pytorch/data/this/url/dont/exist" + http_error_dp = HttpReader(IterableWrapper([error_url]), timeout=timeout) + with self.assertRaisesRegex(Exception, "[404]"): + next(iter(http_error_dp.readlines())) + def test_on_disk_cache_holder_iterdatapipe(self): tar_file_url = "https://raw.githubusercontent.com/pytorch/data/main/test/_fakedata/csv.tar.gz" expected_file_name = os.path.join(self.temp_dir.name, "csv.tar.gz") diff --git a/torchdata/datapipes/iter/load/online.py b/torchdata/datapipes/iter/load/online.py index fb3c32f30..4747f7003 100644 --- a/torchdata/datapipes/iter/load/online.py +++ b/torchdata/datapipes/iter/load/online.py @@ -43,6 +43,7 @@ def _get_response_from_http( r = session.get(url, stream=True, proxies=proxies, **query_params) # type: ignore[arg-type] else: r = session.get(url, timeout=timeout, stream=True, proxies=proxies, **query_params) # type: ignore[arg-type] + r.raise_for_status() return url, StreamWrapper(r.raw) except HTTPError as e: raise Exception(f"Could not get the file. [HTTP Error] {e.response}.")