Cleanup DataConfig implementation (microsoft#1187)

yuwenzho · Jun 6, 2024 · 1358acf · 1358acf
1 parent 7ca6fc4
commit 1358acf
Show file tree

Hide file tree

Showing 70 changed files with 1,702 additions and 1,234 deletions.
diff --git a/.azure_pipelines/performance_check/configs/bert.json b/.azure_pipelines/performance_check/configs/bert.json
@@ -12,19 +12,29 @@
         {
             "name": "glue_mrpc",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "batch_size": 1,
-                "max_samples": 100,
-                "data_name": "glue",
-                "input_cols": [
-                    "sentence1",
-                    "sentence2"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "split": "validation",
-                "subset": "mrpc"
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "split": "validation",
+                    "subset": "mrpc"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "input_cols": [
+                        "sentence1",
+                        "sentence2"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
+                }
             }
         }
     ],

diff --git a/.azure_pipelines/performance_check/configs/bert_gpu.json b/.azure_pipelines/performance_check/configs/bert_gpu.json
@@ -27,19 +27,29 @@
         {
             "name": "glue_mrpc",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "batch_size": 1,
-                "max_samples": 100,
-                "data_name": "glue",
-                "input_cols": [
-                    "sentence1",
-                    "sentence2"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "split": "validation",
-                "subset": "mrpc"
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "split": "validation",
+                    "subset": "mrpc"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "input_cols": [
+                        "sentence1",
+                        "sentence2"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
+                }
             }
         }
     ],

diff --git a/.azure_pipelines/performance_check/configs/deberta.json b/.azure_pipelines/performance_check/configs/deberta.json
@@ -12,23 +12,29 @@
         {
             "name": "glue_mnli_matched",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "data_name": "glue",
-                "subset": "mnli_matched",
-                "split": "validation",
-                "input_cols": [
-                    "premise",
-                    "hypothesis"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "batch_size": 1,
-                "max_samples": 100,
-                "component_kwargs": {
-                    "pre_process_data": {
-                        "align_labels": true
-                    }
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "subset": "mnli_matched",
+                    "split": "validation"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "align_labels": true,
+                    "input_cols": [
+                        "premise",
+                        "hypothesis"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
                 }
             }
         }

diff --git a/.azure_pipelines/performance_check/configs/deberta_gpu.json b/.azure_pipelines/performance_check/configs/deberta_gpu.json
@@ -27,23 +27,29 @@
         {
             "name": "glue_mnli_matched",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "data_name": "glue",
-                "subset": "mnli_matched",
-                "split": "validation",
-                "input_cols": [
-                    "premise",
-                    "hypothesis"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "batch_size": 1,
-                "max_samples": 100,
-                "component_kwargs": {
-                    "pre_process_data": {
-                        "align_labels": true
-                    }
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "subset": "mnli_matched",
+                    "split": "validation"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "align_labels": true,
+                    "input_cols": [
+                        "premise",
+                        "hypothesis"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
                 }
             }
         }

diff --git a/.azure_pipelines/performance_check/configs/distilbert.json b/.azure_pipelines/performance_check/configs/distilbert.json
@@ -12,18 +12,28 @@
         {
             "name": "glue_sst2",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "data_name": "glue",
-                "subset": "sst2",
-                "split": "validation",
-                "input_cols": [
-                    "sentence"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "batch_size": 1,
-                "max_samples": 100
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "subset": "sst2",
+                    "split": "validation"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "input_cols": [
+                        "sentence"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
+                }
             }
         }
     ],

diff --git a/.azure_pipelines/performance_check/configs/distilbert_gpu.json b/.azure_pipelines/performance_check/configs/distilbert_gpu.json
@@ -27,18 +27,28 @@
         {
             "name": "glue_sst2",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "data_name": "glue",
-                "subset": "sst2",
-                "split": "validation",
-                "input_cols": [
-                    "sentence"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "batch_size": 1,
-                "max_samples": 100
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "subset": "sst2",
+                    "split": "validation"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "input_cols": [
+                        "sentence"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
+                }
             }
         }
     ],

diff --git a/.azure_pipelines/performance_check/configs/roberta_large.json b/.azure_pipelines/performance_check/configs/roberta_large.json
@@ -12,23 +12,29 @@
         {
             "name": "glue_mnli_matched",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "data_name": "glue",
-                "subset": "mnli_matched",
-                "split": "validation",
-                "input_cols": [
-                    "premise",
-                    "hypothesis"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "batch_size": 1,
-                "max_samples": 100,
-                "component_kwargs": {
-                    "pre_process_data": {
-                        "align_labels": true
-                    }
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "subset": "mnli_matched",
+                    "split": "validation"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "align_labels": true,
+                    "input_cols": [
+                        "premise",
+                        "hypothesis"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
                 }
             }
         }

diff --git a/.azure_pipelines/performance_check/configs/roberta_large_gpu.json b/.azure_pipelines/performance_check/configs/roberta_large_gpu.json
@@ -27,23 +27,29 @@
         {
             "name": "glue_mnli_matched",
             "type": "HuggingfaceContainer",
-            "params_config": {
-                "data_name": "glue",
-                "subset": "mnli_matched",
-                "split": "validation",
-                "input_cols": [
-                    "premise",
-                    "hypothesis"
-                ],
-                "label_cols": [
-                    "label"
-                ],
-                "batch_size": 1,
-                "max_samples": 100,
-                "component_kwargs": {
-                    "pre_process_data": {
-                        "align_labels": true
-                    }
+            "load_dataset_config": {
+                "params": {
+                    "data_name": "glue",
+                    "subset": "mnli_matched",
+                    "split": "validation"
+                }
+            },
+            "pre_process_data_config": {
+                "params": {
+                    "align_labels": true,
+                    "input_cols": [
+                        "premise",
+                        "hypothesis"
+                    ],
+                    "label_cols": [
+                        "label"
+                    ],
+                    "max_samples": 100
+                }
+            },
+            "dataloader_config": {
+                "params": {
+                    "batch_size": 1
                 }
             }
         }