diff --git a/.travis.yml b/.travis.yml
index 24843e8ee6..6b9f252237 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,6 +31,7 @@ matrix:
 install:
   - pip install -e ./
   - pip install tensorflow
+  - pip install pandas
 
 script:
   - python -m unittest discover -v
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12e1062671..001cc1ddf6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `LFW` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/110>)
 - Support of polygons' and masks' confusion matrices and mismathing classes in `diff` command (<https://github.com/openvinotoolkit/datumaro/pull/117>)
 - Add near duplicate image removal plugin (<https://github.com/openvinotoolkit/datumaro/pull/113>)
+- Sampler Plugin that analyzes inference result from the given dataset and selects samples for annotation(<https://github.com/openvinotoolkit/datumaro/pull/115>)
 
 ### Changed
 - OpenVINO model launcher is updated for OpenVINO r2021.1 (<https://github.com/openvinotoolkit/datumaro/pull/100>)
diff --git a/README.md b/README.md
index 5a4e582943..65aa2817c2 100644
--- a/README.md
+++ b/README.md
@@ -158,6 +158,11 @@ CVAT annotations                             ---> Publication, statistics etc.
       - for detection task, based on bboxes
       - for re-identification task, based on labels,
         avoiding having same IDs in training and test splits
+  - Sampling a dataset
+    - analyzes inference result from the given dataset 
+      and selects the ‘best’ and the ‘least amount of’ samples for annotation.
+    - Select the sample that best suits model training.
+      - sampling with Entropy based algorithm
 - Dataset quality checking
   - Simple checking for errors
   - Comparison with model infernece
diff --git a/datumaro/plugins/sampler/__init__.py b/datumaro/plugins/sampler/__init__.py
new file mode 100644
index 0000000000..0aa5e58c75
--- /dev/null
+++ b/datumaro/plugins/sampler/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
diff --git a/datumaro/plugins/sampler/algorithm/algorithm.py b/datumaro/plugins/sampler/algorithm/algorithm.py
new file mode 100644
index 0000000000..7dd9758173
--- /dev/null
+++ b/datumaro/plugins/sampler/algorithm/algorithm.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from enum import Enum
+
+SamplingMethod = Enum("SamplingMethod", ["topk", "lowk", "randk", "mixk", "randtopk"])
+Algorithm = Enum("Algorithm", ["entropy"])
+
+
+class InferenceResultAnalyzer:
+    """
+    Basic interface for IRA (Inference Result Analyzer)
+    """
+
+    def __init__(self, dataset, inference):
+        self.data = dataset
+        self.inference = inference
+        self.sampling_method = SamplingMethod
+
+    def get_sample(self, method: str, k: int):
+        raise NotImplementedError()
diff --git a/datumaro/plugins/sampler/algorithm/entropy.py b/datumaro/plugins/sampler/algorithm/entropy.py
new file mode 100644
index 0000000000..8ea69cf5aa
--- /dev/null
+++ b/datumaro/plugins/sampler/algorithm/entropy.py
@@ -0,0 +1,191 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import pandas as pd
+import math
+import re
+import logging as log
+
+from .algorithm import InferenceResultAnalyzer
+
+
+class SampleEntropy(InferenceResultAnalyzer):
+    """
+    Entropy is a class that inherits an Sampler,
+    calculates an uncertainty score based on an entropy,
+    and get samples based on that score.
+    """
+
+    def __init__(self, data, inference):
+        """
+        Constructor function
+        Args:
+            data: Receive the data format in pd.DataFrame format. ImageID is an essential element for data.
+            inference:
+                    Receive the inference format in the form of pd.DataFrame.
+                    ImageID and ClassProbability are essential for inferences.
+        """
+        super().__init__(data, inference)
+
+        # check the existence of "ImageID" in data & inference
+        if "ImageID" not in data:
+            raise Exception("Invalid Data, ImageID not found in data")
+        if "ImageID" not in inference:
+            raise Exception("Invalid Data, ImageID not found in inference")
+
+        # check the existence of "ClassProbability" in inference
+        self.num_classes = 0
+        for head in list(inference):
+            m = re.match("ClassProbability\d+", head)
+            if m is not None:
+                self.num_classes += 1
+
+        if not self.num_classes > 0:
+            raise Exception(
+                "Invalid data, Inference do not have ClassProbability values!"
+            )
+
+        # rank: The inference DataFrame, sorted according to the score.
+        self.rank = self._rank_images().sort_values(by="rank")
+
+    def get_sample(self, method: str, k: int, n: int = 3) -> pd.DataFrame:
+        """
+        A function that extracts sample data and returns it.
+        Args:
+            method:
+                - 'topk': It extracts the k sample data with the highest uncertainty.
+                - 'lowk':  It extracts the k sample data with the lowest uncertainty.
+                - 'randomk': Extract and return random k sample data.
+            k: number of sample data
+            n: Parameters to be used in the randtopk method, Variable to first extract data of multiple n of k.
+        Returns:
+            Extracted sample data : pd.DataFrame
+        """
+        temp_rank = self.rank
+
+        # 1. k value check
+        if not isinstance(k, int):
+            raise Exception(
+                f"Invalid value {k}. k must have an integer greater than zero."
+            )
+        elif k <= 0:
+            raise Exception(
+                f"Invalid number {k}. k must have a positive number greater than zero."
+            )
+
+        # 2. Select a sample according to the method
+        if k <= len(temp_rank):
+            if method == self.sampling_method.topk.name:
+                temp_rank = temp_rank[:k]
+            elif method == self.sampling_method.lowk.name:
+                temp_rank = temp_rank[-k:]
+            elif method == self.sampling_method.randk.name:
+                return self.data.sample(n=k).reset_index(drop=True)
+            elif method in [
+                self.sampling_method.mixk.name,
+                self.sampling_method.randtopk.name,
+            ]:
+                return self._get_sample_mixed(method=method, k=k, n=n)
+            else:
+                raise Exception(f"Not Found method '{method}'")
+        else:
+            log.warning(
+                "The number of samples is greater than the size of the selected subset."
+            )
+
+        columns = list(self.data.columns)
+        merged_df = pd.merge(temp_rank, self.data, how="inner", on=["ImageID"])
+        return merged_df[columns].reset_index(drop=True)
+
+    def _get_sample_mixed(self, method: str, k: int, n: int = 3) -> pd.DataFrame:
+        """
+        A function that extracts sample data and returns it.
+        Args:
+            method:
+                - 'mixk': Return top-k and low-k halves based on uncertainty.
+                - 'randomtopk': Randomly extract n*k and return k with high uncertainty.
+            k: number of sample data
+            n: Number to extract n*k from total data according to n, and top-k from it
+        Returns:
+            Extracted sample data : pd.DataFrame
+        """
+        temp_rank = self.rank
+
+        # Select a sample according to the method
+        if k <= len(temp_rank):
+            if method == self.sampling_method.mixk.name:
+                if k % 2 == 0:
+                    temp_rank = pd.concat([temp_rank[: k // 2], temp_rank[-(k // 2) :]])
+                else:
+                    temp_rank = pd.concat(
+                        [temp_rank[: (k // 2) + 1], temp_rank[-(k // 2) :]]
+                    )
+            elif method == self.sampling_method.randtopk.name:
+                if n * k <= len(temp_rank):
+                    temp_rank = temp_rank.sample(n=n * k).sort_values(by="rank")
+                else:
+                    log.warning(msg="n * k exceeds the length of the inference")
+                temp_rank = temp_rank[:k]
+
+        columns = list(self.data.columns)
+        merged_df = pd.merge(temp_rank, self.data, how="inner", on=["ImageID"])
+        return merged_df[columns].reset_index(drop=True)
+
+    def _rank_images(self) -> pd.DataFrame:
+        """
+        A internal function that ranks the inference data based on uncertainty.
+        Returns:
+            inference data sorted by uncertainty. pd.DataFrame
+        """
+        # 1. Load Inference
+        inference, res = None, None
+        if self.inference is not None:
+            inference = pd.DataFrame(self.inference)
+        else:
+            raise Exception("Invalid Data, Failed to load inference result!")
+
+        # 2. If the reference data frame does not contain an uncertify score, calculate it
+        if "Uncertainty" not in inference:
+            inference = self._calculate_uncertainty_from_classprob(inference=inference)
+
+        # 3. Check that Uncertainty values are in place.
+        na_df = inference.isna().sum()
+        if "Uncertainty" in na_df and na_df["Uncertainty"] > 0:
+            raise Exception("Some inference results do not have Uncertainty values!")
+
+        # 4. Ranked based on Uncertainty score
+        res = inference[["ImageID", "Uncertainty"]].groupby("ImageID").mean()
+        res["rank"] = res["Uncertainty"].rank(ascending=False, method="first")
+        res = res.reset_index()
+
+        return res
+
+    def _calculate_uncertainty_from_classprob(
+        self, inference: pd.DataFrame
+    ) -> pd.DataFrame:
+        """
+        A function that calculates uncertainty based on entropy through ClassProbability values.
+        Args:
+            inference: Inference data where uncertainty has not been calculated
+        Returns:
+            inference data with uncertainty variable
+        """
+
+        # Calculate Entropy (Uncertainty Score)
+        uncertainty = []
+        for i in range(len(inference)):
+            entropy = 0
+            for j in range(self.num_classes):
+                p = inference.loc[i][f"ClassProbability{j+1}"]
+                if p < 0 or p > 1:
+                    raise Exception(
+                        "Invalid data, Math domain Error! p is between 0 and 1"
+                    )
+                entropy -= p * math.log(p + 1e-14, math.e)
+
+            uncertainty.append(entropy)
+
+        inference["Uncertainty"] = uncertainty
+
+        return inference
diff --git a/datumaro/plugins/sampler/sampler.py b/datumaro/plugins/sampler/sampler.py
new file mode 100644
index 0000000000..e808d91fe7
--- /dev/null
+++ b/datumaro/plugins/sampler/sampler.py
@@ -0,0 +1,216 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import pandas as pd
+from collections import defaultdict
+from .algorithm.algorithm import SamplingMethod, Algorithm
+
+from datumaro.components.extractor import Transform, DEFAULT_SUBSET_NAME
+from datumaro.components.cli_plugin import CliPlugin
+
+
+class Sampler(Transform, CliPlugin):
+    """
+    Sampler that analyzes the inference result of the dataset |n
+    and picks the best sample for training.|n
+    |n
+    Notes:|n
+    - Each image's inference result must contain the probability for all classes.|n
+    - Requesting a sample larger than the number of all images will return all images.|n
+    |n
+    Example:|n
+    |s|s%(prog)s \ |n
+    |s|s|s|s-a entropy \ |n
+    |s|s|s|s-subset_name train \ |n
+    |s|s|s|s-sample_name sample \ |n
+    |s|s|s|s-unsampled_name unsampled \ |n
+    |s|s|s|s-m topk -k 20
+    """
+
+    @classmethod
+    def build_cmdline_parser(cls, **kwargs):
+        parser = super().build_cmdline_parser(**kwargs)
+        parser.add_argument(
+            "-a",
+            "--algorithm",
+            type=str,
+            default="entropy",
+            choices=[t.name for t in Algorithm],
+            help=f"Select algorithm, example: {[t.name for t in Algorithm]}",
+        )
+        parser.add_argument(
+            "-subset_name",
+            "--subset_name",
+            type=str,
+            help="Subset name to select sample",
+        )
+        parser.add_argument(
+            "-sample_name",
+            "--sampled_name",
+            type=str,
+            default="sampled_set",
+            help="sampled data subset name",
+        )
+        parser.add_argument(
+            "-unsample_name",
+            "--unsampled_name",
+            type=str,
+            default="unsampled_set",
+            help="unsampled data subset name name",
+        )
+        parser.add_argument(
+            "-m",
+            "--sampling_method",
+            type=str,
+            default="topk",
+            choices=[t.name for t in SamplingMethod],
+            help=f"Method of sampling, example: {[t.name for t in SamplingMethod]}",
+        )
+        parser.add_argument("-k", "--num_sample", type=int, help="Num of sample")
+        parser.add_argument(
+            "-o",
+            "--output_file",
+            type=str,
+            default=None,
+            help="Output Sample file path, The extension of the file must end with .csv",
+        )
+        return parser
+
+    def __init__(
+        self,
+        extractor,
+        algorithm,
+        subset_name,
+        sampled_name,
+        unsampled_name,
+        sampling_method,
+        num_sample,
+        output_file,
+    ):
+        """
+        Parameters
+        ----------
+        extractor : Extractor, Dataset
+        algorithm : str
+            Specifying the algorithm to calculate the uncertainty
+            for sample selection. default: 'entropy'
+        subset_name : str
+            The name of the subset to which you want to select a sample.
+        sample_name : str
+            Subset name of the selected sample, default: 'sample'
+        sampling_method : str
+            Method of sampling, 'topk' or 'lowk' or 'randk'
+        num_sample : int
+            Number of samples extracted
+        output_file : str
+            Path of sampler result, Use when user want to save results
+        """
+        super().__init__(extractor)
+
+        # Get Parameters
+        self.subset_name = subset_name
+        self.sampled_name = sampled_name
+        self.unsampled_name = unsampled_name
+        self.algorithm = algorithm
+        self.sampling_method = sampling_method
+        self.num_sample = num_sample
+        self.output_file = output_file
+
+        # optional. Use the --output_file option to save the sample list as a csv file.
+        if output_file is not None and output_file.split(".")[-1] != ".csv":
+            raise Exception(
+                "Invalid extension, The extension of the file must end with .csv"
+            )
+
+    @staticmethod
+    def _load_inference_from_subset(extractor, subset_name):
+        # 1. Get Dataset from subset name
+        if subset_name in extractor.subsets().keys():
+            subset = extractor.get_subset(subset_name)
+        else:
+            raise Exception(f"Not Found subset '{subset_name}'")
+
+        data_df = defaultdict(list)
+        infer_df = defaultdict(list)
+
+        # 2. Fill the data_df and infer_df to fit the sampler algorithm input format.
+        for item in subset:
+            data_df["ImageID"].append(item.id)
+
+            if not item.has_image or item.image.size is None:
+                raise Exception(f"Invalid data, data.id: {item.id}")
+
+            width, height = item.image.size
+            data_df["Width"].append(width)
+            data_df["Height"].append(height)
+            data_df["ImagePath"].append(item.image.path)
+
+            if not item.annotations:
+                raise Exception("Invalid data, data.annotations is empty")
+
+            for annotation in item.annotations:
+                if "score" not in annotation.attributes:
+                    raise Exception("Invalid data, probability score is None")
+                probs = annotation.attributes["score"]
+
+                infer_df["ImageID"].append(item.id)
+
+                for prob_idx, prob in enumerate(probs):
+                    infer_df[f"ClassProbability{prob_idx+1}"].append(prob)
+
+        data_df = pd.DataFrame(data_df)
+        infer_df = pd.DataFrame(infer_df)
+
+        return data_df, infer_df
+
+    @staticmethod
+    def _calculate_uncertainty(algorithm, data, inference):
+        # Checking and creating algorithms
+        algorithms = Algorithm
+        if algorithm == algorithms.entropy.name:
+            from .algorithm.entropy import SampleEntropy
+
+            # Data delivery, uncertainty score calculations also proceed.
+            sampler = SampleEntropy(data, inference)
+        else:
+            raise Exception(
+                f"Not Found algorithm '{algorithm}', available algorithms: {algorithms}"
+            )
+        return sampler
+
+    def _check_sample(self, image):
+        # The function that determines the subset name of the data.
+        if image.subset:
+            if image.subset == self.subset_name:
+                # 1. Returns the sample subset if the id belongs to samples.
+                if image.id in self.sample_id:
+                    return self.sampled_name
+                else:
+                    return self.unsampled_name
+            else:
+                # 2. Returns the existing subset name if it is not a sample
+                return image.subset
+        else:
+            return DEFAULT_SUBSET_NAME
+
+    def __iter__(self):
+        # Import data into a subset name and convert it
+        # to a format that will be used in the sampler algorithm with the inference result.
+        data_df, infer_df = self._load_inference_from_subset(
+            self._extractor, self.subset_name
+        )
+
+        # Transfer the data to sampler algorithm to calculate uncertainty & get sample list
+        sampler = self._calculate_uncertainty(self.algorithm, data_df, infer_df)
+        self.result = sampler.get_sample(method=self.sampling_method, k=self.num_sample)
+
+        if self.output_file is not None:
+            self.result.to_csv(self.output_file, index=False)
+
+        self.sample_id = self.result["ImageID"].to_list()
+
+        # Transform properties for each data
+        for item in self._extractor:
+            # After checking whether each item belongs to a sample, rename the subset.
+            yield self.wrap_item(item, subset=self._check_sample(item))
diff --git a/docs/user_manual.md b/docs/user_manual.md
index 6266c323d8..99eb220523 100644
--- a/docs/user_manual.md
+++ b/docs/user_manual.md
@@ -1023,6 +1023,26 @@ datum transform -t rename -- -e '|pattern|replacement|'
 datum transform -t rename -- -e '|frame_(\d+)|\\1|'
 ```
 
+Example: Sampling dataset items, subset `train` is divided into `sampled`(sampled_subset) and `unsampled`
+- `train` has 100 data, and 20 samples are selected. There are `sampled`(20 samples) and 80 `unsampled`(80 datas) subsets.
+- Remove `train` subset (if sample_name=`train` or unsample_name=`train`, still remain)
+- There are five methods of sampling the m option.
+    - `topk`: Return the k with high uncertainty data
+    - `lowk`: Return the k with low uncertainty data
+    - `randk`: Return the random k data
+    - `mixk`: Return half to topk method and the rest to lowk method
+    - `randtopk`: First, select 3 times the number of k randomly, and return the topk among them.
+
+``` bash
+datum transform -t sampler -- \
+    -a entropy \
+    -subset_name train \
+    -sample_name sampled \
+    -unsample_name unsampled \
+    -m topk \
+    -k 20
+```
+
 ## Extending
 
 There are few ways to extend and customize Datumaro behaviour, which is supported by plugins.
diff --git a/requirements.txt b/requirements.txt
index 6bc3c7ee79..5cfc7dd4f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,4 @@ pycocotools>=2.0.0
 PyYAML>=5.3.1
 scikit-image>=0.15.0
 tensorboardX>=1.8
+pandas>=1.1.5
diff --git a/tests/assets/sampler/inference.csv b/tests/assets/sampler/inference.csv
new file mode 100644
index 0000000000..e08065831a
--- /dev/null
+++ b/tests/assets/sampler/inference.csv
@@ -0,0 +1,31 @@
+ImageID,ClassProbability1,ClassProbability2,ClassProbability3,Uncertainty
+1,0.975242317,0.024469912,0.000287826,0.117586322
+2,0.999715984,0.000281501,2.53E-06,0.002618015
+3,0.999299884,0.000691595,8.50E-06,0.005831472
+4,0.971567273,0.027958876,0.000473852,0.131661266
+5,0.999411225,0.000576135,1.26E-05,0.005028461
+6,0.999715269,0.00027976,4.95E-06,0.002634019
+7,0.978483677,0.021343108,0.00017317,0.104890488
+8,0.984344006,0.015289294,0.000366639,0.082351737
+9,0.974284053,0.025472108,0.000243954,0.120898865
+10,0.964820206,0.034958012,0.000221764,0.153654948
+11,0.996293604,0.003278826,0.000427532,0.02577186
+12,0.999689937,0.000307999,2.14E-06,0.002828279
+13,0.997596323,0.000604421,0.001799274,0.018252373
+14,0.999696493,0.000294724,8.87E-06,0.002802743
+15,0.999686837,0.000309912,3.27E-06,0.002858304
+16,0.999234438,0.000750318,1.53E-05,0.006333055
+17,0.999581277,0.000413273,5.49E-06,0.003705278
+18,0.999384761,0.000604751,1.05E-05,0.005217474
+19,0.999574125,0.000416982,8.93E-06,0.003774712
+20,0.999575078,0.000411838,1.31E-05,0.003782649
+21,0.999712646,0.000286349,9.24E-07,0.002636151
+22,0.998748422,0.001103578,0.000147974,0.010070177
+23,0.999729574,0.000268848,1.53E-06,0.002501184
+24,0.999636412,0.000354998,8.59E-06,0.003283583
+25,0.999675989,0.000322926,1.11E-06,0.002934833
+26,0.970380008,0.029310413,0.000309611,0.135138899
+27,0.979150653,0.019359451,0.001489813,0.106692567
+28,0.999622822,0.000374233,3.02E-06,0.003368486
+29,0.999201596,0.000615866,0.000182658,0.006923281
+30,0.999691606,0.0002986,9.82E-06,0.002845172
diff --git a/tests/test_sampler.py b/tests/test_sampler.py
new file mode 100644
index 0000000000..e31dd12ed0
--- /dev/null
+++ b/tests/test_sampler.py
@@ -0,0 +1,1120 @@
+from collections import defaultdict
+from unittest import TestCase
+
+from datumaro.components.project import Dataset
+from datumaro.components.extractor import (
+    DatasetItem,
+    Label,
+    LabelCategories,
+    AnnotationType,
+)
+from datumaro.util.image import Image
+
+import csv
+import pandas as pd
+
+import datumaro.plugins.sampler.sampler as sampler
+from datumaro.plugins.sampler.algorithm.entropy import SampleEntropy as entropy
+
+
+class SamplerTest(TestCase):
+    @staticmethod
+    def _get_probs(out_range=False):
+        probs = []
+        # data length is 500
+        inference_file = "tests/assets/sampler/inference.csv"
+        with open(inference_file) as csv_file:
+            csv_reader = csv.reader(csv_file)
+            col = 0
+            for row in csv_reader:
+                if col == 0:
+                    col += 1
+                    continue
+                else:
+                    if out_range:
+                        probs.append(list(map(lambda x: -float(x), row[1:4])))
+                    else:
+                        probs.append(list(map(float, row[1:4])))
+        return probs
+
+    def _generate_classification_dataset(
+        self,
+        config,
+        subset=None,
+        empty_score=False,
+        out_range=False,
+        no_attr=False,
+        no_img=False,
+    ):
+
+        probs = self._get_probs(out_range)
+        if subset is None:
+            self.subset = ["train", "val", "test"]
+        else:
+            self.subset = subset
+
+        iterable = []
+        label_cat = LabelCategories()
+        idx = 0
+        for label_id, label in enumerate(config.keys()):
+            num_item = config[label]
+            label_cat.add(label, attributes=None)
+            for _ in range(num_item):
+                score = probs[idx]
+                idx += 1
+                if empty_score:
+                    score = []
+                attr = {"score": score}
+                if no_attr:
+                    attr = {}
+                img = Image(path=f"test/dataset/{idx}.jpg", size=(90, 90))
+                if no_img:
+                    img = None
+                iterable.append(
+                    DatasetItem(
+                        idx,
+                        subset=self.subset[idx % len(self.subset)],
+                        annotations=[
+                            Label(
+                                label_id,
+                                attributes=attr,
+                            )
+                        ],
+                        image=img,
+                    )
+                )
+        categories = {AnnotationType.label: label_cat}
+        dataset = Dataset.from_iterable(iterable, categories)
+        return dataset
+
+    def test_sampler_get_sample_classification(self):
+        config = {
+            "label1": 10,
+            "label2": 10,
+            "label3": 10,
+        }
+
+        source = self._generate_classification_dataset(config, ["train"])
+        num_pre_train_subset = len(source.get_subset("train"))
+
+        num_sample = 5
+
+        with self.subTest("Top-K method"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_sample, len(result.get_subset("sample")))
+            self.assertEqual(
+                len(result.get_subset("unsampled")),
+                num_pre_train_subset - len(result.get_subset("sample")),
+            )
+            topk_expected_result = [1, 4, 9, 10, 26]
+            topk_result = list(map(int, result.result["ImageID"].to_list()))
+            self.assertEqual(sorted(topk_result), topk_expected_result)
+
+        with self.subTest("Low-K method"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method="lowk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_sample, len(result.get_subset("sample")))
+            self.assertEqual(
+                len(result.get_subset("unsampled")),
+                num_pre_train_subset - len(result.get_subset("sample")),
+            )
+            lowk_expected_result = [2, 6, 14, 21, 23]
+            lowk_result = list(map(int, result.result["ImageID"].to_list()))
+            self.assertEqual(sorted(lowk_result), lowk_expected_result)
+
+        with self.subTest("Rand-K method"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method="randk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_sample, len(result.get_subset("sample")))
+            self.assertEqual(
+                len(result.get_subset("unsampled")),
+                num_pre_train_subset - len(result.get_subset("sample")),
+            )
+
+        with self.subTest("Mix-K method"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method="mixk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_sample, len(result.get_subset("sample")))
+            self.assertEqual(
+                len(result.get_subset("unsampled")),
+                num_pre_train_subset - len(result.get_subset("sample")),
+            )
+            mixk_expected_result = [2, 4, 10, 23, 26]
+            mixk_result = list(map(int, result.result["ImageID"].to_list()))
+            self.assertEqual(sorted(mixk_result), mixk_expected_result)
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method="mixk",
+                num_sample=6,
+                output_file=None,
+            )
+            self.assertEqual(6, len(result.get_subset("sample")))
+            self.assertEqual(
+                len(result.get_subset("unsampled")),
+                num_pre_train_subset - len(result.get_subset("sample")),
+            )
+            mixk_expected_result = [2, 4, 6, 10, 23, 26]
+            mixk_result = list(map(int, result.result["ImageID"].to_list()))
+            self.assertEqual(sorted(mixk_result), mixk_expected_result)
+
+        with self.subTest("Randtop-K method"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method="randtopk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(num_sample, len(result.get_subset("sample")))
+            self.assertEqual(
+                len(result.get_subset("unsampled")),
+                num_pre_train_subset - len(result.get_subset("sample")),
+            )
+
+    def test_sampler_gives_error(self):
+        config = {
+            "label1": 10,
+            "label2": 10,
+            "label3": 10,
+        }
+        num_sample = 5
+
+        source = self._generate_classification_dataset(config)
+
+        with self.subTest("Not found"):
+            with self.assertRaisesRegex(Exception, "Not Found subset"):
+                subset = "hello"
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name=subset,
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=num_sample,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(Exception, "Not Found algorithm"):
+                algorithm = "hello"
+                result = sampler.Sampler(
+                    source,
+                    algorithm=algorithm,
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=num_sample,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(Exception, "Not Found method"):
+                sampling_method = "hello"
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method=sampling_method,
+                    num_sample=num_sample,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+        with self.subTest("Invalid Value"):
+            with self.assertRaisesRegex(Exception, "Invalid number"):
+                k = 0
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=k,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(Exception, "Invalid number"):
+                k = -1
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=k,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(Exception, "Invalid value"):
+                k = "string"
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=k,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(Exception, "Invalid extension"):
+                output_file = "string.xml"
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=num_sample,
+                    output_file=output_file,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(Exception, "Invalid extension"):
+                output_file = "string"
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=num_sample,
+                    output_file=output_file,
+                )
+                result = iter(result)
+                next(result)
+
+            with self.assertRaisesRegex(
+                Exception, "Invalid Data, ImageID not found in data"
+            ):
+                sub = source.get_subset("train")
+
+                data_df = defaultdict(list)
+                infer_df = defaultdict(list)
+
+                for data in sub:
+                    width, height = data.image.size
+                    data_df["Width"].append(width)
+                    data_df["Height"].append(height)
+                    data_df["ImagePath"].append(data.image.path)
+
+                    for annotation in data.annotations:
+                        probs = annotation.attributes["score"]
+                        infer_df["ImageID"].append(data.id)
+
+                        for prob_idx, prob in enumerate(probs):
+                            infer_df[f"ClassProbability{prob_idx+1}"].append(prob)
+
+                data_df = pd.DataFrame(data_df)
+                infer_df = pd.DataFrame(infer_df)
+
+                entropy(data_df, infer_df)
+
+            with self.assertRaisesRegex(
+                Exception, "Invalid Data, ImageID not found in inference"
+            ):
+                sub = source.get_subset("train")
+
+                data_df = defaultdict(list)
+                infer_df = defaultdict(list)
+
+                for data in sub:
+                    width, height = data.image.size
+                    data_df["ImageID"].append(data.id)
+                    data_df["Width"].append(width)
+                    data_df["Height"].append(height)
+                    data_df["ImagePath"].append(data.image.path)
+
+                    for annotation in data.annotations:
+                        probs = annotation.attributes["score"]
+
+                        for prob_idx, prob in enumerate(probs):
+                            infer_df[f"ClassProbability{prob_idx+1}"].append(prob)
+
+                data_df = pd.DataFrame(data_df)
+                infer_df = pd.DataFrame(infer_df)
+
+                entropy(data_df, infer_df)
+
+    def test_sampler_get_invalid_data(self):
+        with self.subTest("empty dataset"):
+            config = {
+                "label1": 0,
+                "label2": 0,
+                "label3": 0,
+            }
+
+            source = self._generate_classification_dataset(config)
+            with self.assertRaisesRegex(Exception, "Not Found"):
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=5,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+        with self.subTest("Dataset without Score (Probability)"):
+            config = {
+                "label1": 10,
+                "label2": 10,
+                "label3": 10,
+            }
+
+            source = self._generate_classification_dataset(config, empty_score=True)
+            with self.assertRaisesRegex(Exception, "Invalid data"):
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=5,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+        with self.subTest("Out of range, probability (Less than 0 or more than 1)"):
+            config = {
+                "label1": 10,
+                "label2": 10,
+                "label3": 10,
+            }
+
+            source = self._generate_classification_dataset(
+                config, empty_score=False, out_range=True
+            )
+            with self.assertRaisesRegex(Exception, "Invalid data"):
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=5,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+        with self.subTest("No Score Attribute Data"):
+            config = {
+                "label1": 10,
+                "label2": 10,
+                "label3": 10,
+            }
+
+            source = self._generate_classification_dataset(config, no_attr=True)
+            with self.assertRaisesRegex(Exception, "Invalid data"):
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=5,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+        with self.subTest("No Image Data"):
+            config = {
+                "label1": 10,
+                "label2": 10,
+                "label3": 10,
+            }
+
+            source = self._generate_classification_dataset(config, no_img=True)
+            with self.assertRaisesRegex(Exception, "Invalid data"):
+                result = sampler.Sampler(
+                    source,
+                    algorithm="entropy",
+                    subset_name="train",
+                    sampled_name="sample",
+                    unsampled_name="unsampled",
+                    sampling_method="topk",
+                    num_sample=5,
+                    output_file=None,
+                )
+                result = iter(result)
+                next(result)
+
+    def test_sampler_number_of_samples(self):
+        config = {
+            "label1": 10,
+            "label2": 10,
+            "label3": 10,
+        }
+
+        source = self._generate_classification_dataset(config)
+        num_pre_train_subset = len(source.get_subset("train"))
+
+        with self.subTest("k > num of data with top-k"):
+            num_sample = 500
+            sampling_method = "topk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k > num of data with low-k"):
+            num_sample = 500
+            sampling_method = "lowk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k > num of data with rand-k"):
+            num_sample = 500
+            sampling_method = "randk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k > num of data with mix-k"):
+            num_sample = 500
+            sampling_method = "mixk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k > num of data with randtop-k"):
+            num_sample = 500
+            sampling_method = "randtopk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k == num of data with top-k"):
+            num_sample = 10
+            sampling_method = "topk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k == num of data with low-k"):
+            num_sample = 10
+            sampling_method = "lowk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k == num of data with rand-k"):
+            num_sample = 10
+            sampling_method = "randk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k == num of data with mix-k"):
+            num_sample = 10
+            sampling_method = "mixk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+        with self.subTest("k == num of data with randtop-k"):
+            num_sample = 10
+            sampling_method = "randtopk"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(num_pre_train_subset, len(result.get_subset("sample")))
+
+            num_sample = 9
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample",
+                unsampled_name="unsampled",
+                sampling_method=sampling_method,
+                num_sample=num_sample,
+                output_file=None,
+            )
+            self.assertEqual(len(result.get_subset("sample")), 9)
+
+    def test_sampler_accumulated_sampling(self):
+        config = {
+            "label1": 10,
+            "label2": 10,
+            "label3": 10,
+        }
+
+        source = self._generate_classification_dataset(config)
+
+        num_pre_train_subset = len(source.get_subset("train"))
+        num_pre_val_subset = len(source.get_subset("val"))
+        num_pre_test_subset = len(source.get_subset("test"))
+
+        with self.subTest("Same Subset, Same number of datas 3times"):
+            num_sample = 3
+            sample_subset_name = "sample"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), num_sample * 2)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample * 2
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), num_sample * 3)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample * 3
+            )
+
+        with self.subTest("Same Subset, 2, 3, 4 sampling"):
+            sample_subset_name = "sample"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=2,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), 2)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 2)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=3,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), 5)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 5)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=4,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), 9)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 9)
+
+        with self.subTest("Different Subset, Same number of datas 3times"):
+            num_sample = 3
+            sample_subset_name = "sample"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="val",
+                sampled_name=sample_subset_name,
+                unsampled_name="val",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), num_sample * 2)
+            self.assertEqual(
+                len(result.get_subset("val")), num_pre_val_subset - num_sample
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="test",
+                sampled_name=sample_subset_name,
+                unsampled_name="test",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), num_sample * 3)
+            self.assertEqual(
+                len(result.get_subset("test")), num_pre_test_subset - num_sample
+            )
+
+        with self.subTest("Different Subset, 2, 3, 4 sampling"):
+            sample_subset_name = "sample"
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name=sample_subset_name,
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=2,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), 2)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 2)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="val",
+                sampled_name=sample_subset_name,
+                unsampled_name="val",
+                sampling_method="topk",
+                num_sample=3,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), 5)
+            self.assertEqual(len(result.get_subset("val")), num_pre_val_subset - 3)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="test",
+                sampled_name=sample_subset_name,
+                unsampled_name="test",
+                sampling_method="topk",
+                num_sample=4,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample")), 9)
+            self.assertEqual(len(result.get_subset("test")), num_pre_test_subset - 4)
+
+    def test_sampler_unaccumulated_sampling(self):
+        config = {
+            "label1": 10,
+            "label2": 10,
+            "label3": 10,
+        }
+
+        source = self._generate_classification_dataset(config)
+
+        num_pre_train_subset = len(source.get_subset("train"))
+        num_pre_val_subset = len(source.get_subset("val"))
+        num_pre_test_subset = len(source.get_subset("test"))
+
+        with self.subTest("Same Subset, Same number of datas 3times"):
+            num_sample = 3
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample1",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample2",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), num_sample)
+            self.assertEqual(len(result.get_subset("sample2")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample * 2
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample3",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), num_sample)
+            self.assertEqual(len(result.get_subset("sample2")), num_sample)
+            self.assertEqual(len(result.get_subset("sample3")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample * 3
+            )
+
+        with self.subTest("Same Subset, 2, 3, 4 sampling"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample1",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=2,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), 2)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 2)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample2",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=3,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), 2)
+            self.assertEqual(len(result.get_subset("sample2")), 3)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 5)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample3",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=4,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), 2)
+            self.assertEqual(len(result.get_subset("sample2")), 3)
+            self.assertEqual(len(result.get_subset("sample3")), 4)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 9)
+
+        with self.subTest("Different Subset, Same number of datas 3times"):
+            num_sample = 3
+
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample1",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("train")), num_pre_train_subset - num_sample
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="val",
+                sampled_name="sample2",
+                unsampled_name="val",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), num_sample)
+            self.assertEqual(len(result.get_subset("sample2")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("val")), num_pre_val_subset - num_sample
+            )
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="test",
+                sampled_name="sample3",
+                unsampled_name="test",
+                sampling_method="topk",
+                num_sample=num_sample,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), num_sample)
+            self.assertEqual(len(result.get_subset("sample2")), num_sample)
+            self.assertEqual(len(result.get_subset("sample3")), num_sample)
+            self.assertEqual(
+                len(result.get_subset("test")), num_pre_test_subset - num_sample
+            )
+
+        with self.subTest("Different Subset, 2, 3, 4 sampling"):
+            result = sampler.Sampler(
+                source,
+                algorithm="entropy",
+                subset_name="train",
+                sampled_name="sample1",
+                unsampled_name="train",
+                sampling_method="topk",
+                num_sample=2,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), 2)
+            self.assertEqual(len(result.get_subset("train")), num_pre_train_subset - 2)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="val",
+                sampled_name="sample2",
+                unsampled_name="val",
+                sampling_method="topk",
+                num_sample=3,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), 2)
+            self.assertEqual(len(result.get_subset("sample2")), 3)
+            self.assertEqual(len(result.get_subset("val")), num_pre_val_subset - 3)
+
+            result = sampler.Sampler(
+                result,
+                algorithm="entropy",
+                subset_name="test",
+                sampled_name="sample3",
+                unsampled_name="test",
+                sampling_method="topk",
+                num_sample=4,
+                output_file=None,
+            )
+
+            self.assertEqual(len(result.get_subset("sample1")), 2)
+            self.assertEqual(len(result.get_subset("sample2")), 3)
+            self.assertEqual(len(result.get_subset("sample3")), 4)
+            self.assertEqual(len(result.get_subset("test")), num_pre_test_subset - 4)
+
+    def test_sampler_parser(self):
+        from argparse import ArgumentParser
+
+        assert isinstance(sampler.Sampler.build_cmdline_parser(), ArgumentParser)