From eac35fc23e0b01c7863cf0db9bb550d4429e0da0 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Mon, 21 Nov 2022 11:55:42 +0100
Subject: [PATCH 1/5] tests: switch T5 test model

---
 tests/task/nlp/test_summarization.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/task/nlp/test_summarization.py b/tests/task/nlp/test_summarization.py
index 39209f68..5a893ef8 100644
--- a/tests/task/nlp/test_summarization.py
+++ b/tests/task/nlp/test_summarization.py
@@ -12,12 +12,13 @@
     XsumSummarizationDataModule,
 )
 
+_MODEL_T5_TINY = "hf-internal-testing/tiny-random-t5"
 
 @pytest.mark.skipif(sys.platform == "win32", reason="Currently Windows is not supported")
 def test_smoke_train(hf_cache_path):
-    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path="patrickvonplaten/t5-tiny-random")
+    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_T5_TINY)
     model = SummarizationTransformer(
-        pretrained_model_name_or_path="patrickvonplaten/t5-tiny-random",
+        pretrained_model_name_or_path=_MODEL_T5_TINY,
         use_stemmer=True,
         val_target_max_length=142,
         num_beams=None,
@@ -41,8 +42,8 @@ def test_smoke_train(hf_cache_path):
 @pytest.mark.skipif(sys.platform == "win32", reason="Currently Windows is not supported")
 def test_smoke_predict():
     model = SummarizationTransformer(
-        pretrained_model_name_or_path="patrickvonplaten/t5-tiny-random",
-        tokenizer=AutoTokenizer.from_pretrained(pretrained_model_name_or_path="patrickvonplaten/t5-tiny-random"),
+        pretrained_model_name_or_path=_MODEL_T5_TINY,
+        tokenizer=AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_T5_TINY),
     )
 
     y = model.hf_predict(

From 602651387101bea494fc7092c797e91fdc192d46 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Mon, 21 Nov 2022 11:56:27 +0100
Subject: [PATCH 2/5] pre-commit

---
 README.md                            | 2 +-
 tests/task/nlp/test_summarization.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a9870ef4..2b061238 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 ### Nov 18 2022
 
-This repository has been archived (read-only) on Nov 18, 2022**. Thanks to everyone who contributed to `lightning-transformers`, we feel it's time to move on.
+**This repository has been archived (read-only) on Nov 18, 2022**. Thanks to everyone who contributed to `lightning-transformers`, we feel it's time to move on.
 
 :hugs: Transformers can **already be easily trained using the Lightning :zap: Trainer**. Here's a recent example from the community: <https://sachinruk.github.io/blog/deep-learning/2022/11/07/t5-for-grammar-correction.html>. Note that there are **no limitations or workarounds**, things just work out of the box.
 
diff --git a/tests/task/nlp/test_summarization.py b/tests/task/nlp/test_summarization.py
index 5a893ef8..d21624cf 100644
--- a/tests/task/nlp/test_summarization.py
+++ b/tests/task/nlp/test_summarization.py
@@ -14,6 +14,7 @@
 
 _MODEL_T5_TINY = "hf-internal-testing/tiny-random-t5"
 
+
 @pytest.mark.skipif(sys.platform == "win32", reason="Currently Windows is not supported")
 def test_smoke_train(hf_cache_path):
     tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_T5_TINY)

From d73322bdd03d964f8a12c079c1dee55383f4414d Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Mon, 21 Nov 2022 12:39:50 +0100
Subject: [PATCH 3/5] fix CNN

---
 .../nlp/summarization/datasets/cnn_dailymail.py  |  4 ++--
 tests/task/nlp/test_summarization.py             | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lightning_transformers/task/nlp/summarization/datasets/cnn_dailymail.py b/lightning_transformers/task/nlp/summarization/datasets/cnn_dailymail.py
index 32eece09..a443b774 100644
--- a/lightning_transformers/task/nlp/summarization/datasets/cnn_dailymail.py
+++ b/lightning_transformers/task/nlp/summarization/datasets/cnn_dailymail.py
@@ -17,8 +17,8 @@
 
 
 class CNNDailyMailSummarizationDataModule(SummarizationDataModule):
-    def __init__(self, *args, dataset_name: str = "cnn_dailymail", **kwargs):
-        super().__init__(*args, dataset_name=dataset_name, **kwargs)
+    def __init__(self, *args, dataset_name: str = "cnn_dailymail", config_name: str = "3.0.0", **kwargs):
+        super().__init__(*args, dataset_name=dataset_name, dataset_config_name=config_name, **kwargs)
 
     @property
     def source_target_column_names(self) -> Tuple[str, str]:
diff --git a/tests/task/nlp/test_summarization.py b/tests/task/nlp/test_summarization.py
index d21624cf..261b50cb 100644
--- a/tests/task/nlp/test_summarization.py
+++ b/tests/task/nlp/test_summarization.py
@@ -7,29 +7,29 @@
 from transformers import AutoTokenizer
 
 from lightning_transformers.task.nlp.summarization import (
+    CNNDailyMailSummarizationDataModule,
     SummarizationDataModule,
     SummarizationTransformer,
-    XsumSummarizationDataModule,
 )
 
-_MODEL_T5_TINY = "hf-internal-testing/tiny-random-t5"
+_MODEL_TINY = "patrickvonplaten/t5-tiny-random"
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="Currently Windows is not supported")
 def test_smoke_train(hf_cache_path):
-    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_T5_TINY)
+    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_TINY)
     model = SummarizationTransformer(
-        pretrained_model_name_or_path=_MODEL_T5_TINY,
+        pretrained_model_name_or_path=_MODEL_TINY,
         use_stemmer=True,
         val_target_max_length=142,
         num_beams=None,
         compute_generate_metrics=True,
     )
-    dm = XsumSummarizationDataModule(
+    dm = CNNDailyMailSummarizationDataModule(
         limit_train_samples=64,
         limit_val_samples=64,
         limit_test_samples=64,
-        batch_size=1,
+        batch_size=32,
         max_source_length=128,
         max_target_length=128,
         cache_dir=hf_cache_path,
@@ -43,8 +43,8 @@ def test_smoke_train(hf_cache_path):
 @pytest.mark.skipif(sys.platform == "win32", reason="Currently Windows is not supported")
 def test_smoke_predict():
     model = SummarizationTransformer(
-        pretrained_model_name_or_path=_MODEL_T5_TINY,
-        tokenizer=AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_T5_TINY),
+        pretrained_model_name_or_path=_MODEL_TINY,
+        tokenizer=AutoTokenizer.from_pretrained(pretrained_model_name_or_path=_MODEL_TINY),
     )
 
     y = model.hf_predict(

From 1948ef60ab0dd711667127af80efac8a6404f562 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Mon, 21 Nov 2022 12:46:42 +0100
Subject: [PATCH 4/5] workers

---
 tests/task/nlp/test_summarization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/task/nlp/test_summarization.py b/tests/task/nlp/test_summarization.py
index 261b50cb..241f35d4 100644
--- a/tests/task/nlp/test_summarization.py
+++ b/tests/task/nlp/test_summarization.py
@@ -29,7 +29,8 @@ def test_smoke_train(hf_cache_path):
         limit_train_samples=64,
         limit_val_samples=64,
         limit_test_samples=64,
-        batch_size=32,
+        num_workers=2,
+        preprocessing_num_workers=2,
         max_source_length=128,
         max_target_length=128,
         cache_dir=hf_cache_path,

From 074f5783ada010b2ad9b3f037a7c6fd5e92de457 Mon Sep 17 00:00:00 2001
From: Jirka <jirka.borovec@seznam.cz>
Date: Mon, 21 Nov 2022 13:26:45 +0100
Subject: [PATCH 5/5] chlog

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d618ea88..5871ca69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [0.2.5] - 2022-11-DD
+
+### Fixed
+
+- Fixed passing config name to `CNNDailyMailSummarizationDataModule` ([#310](https://github.com/Lightning-AI/lightning-transformers/pull/310))
+
+
 ## [0.2.4] - 2022-11-03
 
 ### Changed