Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update regression tests #2556

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -22,3 +22,6 @@

fp16 = dict(loss_scale=512.0)
ignore = False

# for multi-gpu training
find_unused_parameters = True
Original file line number Diff line number Diff line change
@@ -22,3 +22,6 @@

fp16 = dict(loss_scale=512.0)
ignore = False

# for multi-gpu training
find_unused_parameters = True
Original file line number Diff line number Diff line change
@@ -33,3 +33,6 @@

fp16 = dict(loss_scale=512.0)
ignore = False

# for multi-gpu training
find_unused_parameters = True
Original file line number Diff line number Diff line change
@@ -22,3 +22,6 @@

fp16 = dict(loss_scale=512.0)
ignore = False

# for multi-gpu training
find_unused_parameters = True
2 changes: 0 additions & 2 deletions tests/e2e/cli/detection/test_detection.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,6 @@
import os

import pytest
import torch

from otx.api.entities.model_template import parse_model_template
from otx.cli.registry import Registry
@@ -78,7 +77,6 @@

otx_dir = os.getcwd()

MULTI_GPU_UNAVAILABLE = torch.cuda.device_count() <= 1
TT_STABILITY_TESTS = os.environ.get("TT_STABILITY_TESTS", False)
if TT_STABILITY_TESTS:
default_template = parse_model_template(
2 changes: 2 additions & 0 deletions tests/e2e/cli/detection/test_tiling_detection.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@
import os

import pytest
import torch

from otx.api.entities.model_template import parse_model_template
from otx.cli.registry import Registry
@@ -64,6 +65,7 @@

otx_dir = os.getcwd()

MULTI_GPU_UNAVAILABLE = torch.cuda.device_count() <= 1
TT_STABILITY_TESTS = os.environ.get("TT_STABILITY_TESTS", False)
if TT_STABILITY_TESTS:
default_template = parse_model_template(
25 changes: 15 additions & 10 deletions tests/regression/action/test_action_classification.py
Original file line number Diff line number Diff line change
@@ -44,17 +44,20 @@ class TestRegressionActionClassification:

@classmethod
@pytest.fixture(scope="class")
def reg_cfg(cls):
def reg_cfg(cls, tmp_dir_path):
results_root = os.environ.get("REG_RESULTS_ROOT", tmp_dir_path)
cls.reg_cfg = RegressionTestConfig(
cls.TASK_TYPE,
cls.TRAIN_TYPE,
cls.LABEL_TYPE,
os.getcwd(),
train_params=cls.TRAIN_PARAMS,
results_root=results_root,
)

yield cls.reg_cfg

print(f"\nwritting regression result to {cls.reg_cfg.result_dir}/result_{cls.TRAIN_TYPE}_{cls.LABEL_TYPE}.json")
with open(f"{cls.reg_cfg.result_dir}/result_{cls.TRAIN_TYPE}_{cls.LABEL_TYPE}.json", "w") as result_file:
json.dump(cls.reg_cfg.result_dict, result_file, indent=4)

@@ -64,6 +67,7 @@ def setup_method(self):
@e2e_pytest_component
@pytest.mark.parametrize("template", templates, ids=templates_ids)
def test_otx_train(self, reg_cfg, template, tmp_dir_path):
test_type = "train"
self.performance[template.name] = {}

tmp_dir_path = tmp_dir_path / reg_cfg.task_type
@@ -77,21 +81,23 @@ def test_otx_train(self, reg_cfg, template, tmp_dir_path):
tmp_dir_path,
reg_cfg.otx_dir,
reg_cfg.args,
reg_cfg.config_dict["regression_criteria"]["train"],
reg_cfg.config_dict["regression_criteria"][test_type],
self.performance[template.name],
)
infer_elapsed_time = timer() - infer_start_time

self.performance[template.name][TIME_LOG["train_time"]] = round(train_elapsed_time, 3)
self.performance[template.name][TIME_LOG["infer_time"]] = round(infer_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type][reg_cfg.label_type][reg_cfg.train_type]["train"].append(self.performance)
reg_cfg.update_result(test_type, self.performance)

assert test_result["passed"] is True, test_result["log"]

@e2e_pytest_component
@pytest.mark.parametrize("template", templates, ids=templates_ids)
def test_otx_train_kpi_test(self, reg_cfg, template):
performance = reg_cfg.get_template_performance(template)
if performance is None:
pytest.skip(reason="Cannot find performance data from results.")

kpi_train_result = regression_train_time_testing(
train_time_criteria=reg_cfg.config_dict["kpi_e2e_train_time_criteria"]["train"],
@@ -113,6 +119,7 @@ def test_otx_train_kpi_test(self, reg_cfg, template):
def test_otx_export_eval_openvino(self, reg_cfg, template, tmp_dir_path):
if template.name == "MoViNet":
pytest.skip(reason="Issue#2058: MoViNet fails with OpenVINO inference occasionally")
test_type = "export"
self.performance[template.name] = {}

tmp_dir_path = tmp_dir_path / reg_cfg.task_type
@@ -127,25 +134,23 @@ def test_otx_export_eval_openvino(self, reg_cfg, template, tmp_dir_path):
reg_cfg.otx_dir,
reg_cfg.args,
threshold=0.05,
criteria=reg_cfg.config_dict["regression_criteria"]["export"],
criteria=reg_cfg.config_dict["regression_criteria"][test_type],
reg_threshold=0.10,
result_dict=self.performance[template.name],
)
export_eval_elapsed_time = timer() - export_eval_start_time

self.performance[template.name][TIME_LOG["export_time"]] = round(export_elapsed_time, 3)
self.performance[template.name][TIME_LOG["export_eval_time"]] = round(export_eval_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type][reg_cfg.label_type][reg_cfg.train_type]["export"].append(
self.performance
)

reg_cfg.update_result(test_type, self.performance)
assert test_result["passed"] is True, test_result["log"]

@e2e_pytest_component
@pytest.mark.parametrize("template", templates, ids=templates_ids)
def test_ptq_optimize_eval(self, reg_cfg, template, tmp_dir_path):
if template.name == "MoViNet":
pytest.skip(reason="Issue#2058: MoViNet fails with OpenVINO inference occasionally")
test_type = "ptq"
self.performance[template.name] = {}

tmp_dir_path = tmp_dir_path / reg_cfg.task_type
@@ -159,14 +164,14 @@ def test_ptq_optimize_eval(self, reg_cfg, template, tmp_dir_path):
tmp_dir_path,
reg_cfg.otx_dir,
reg_cfg.args,
criteria=reg_cfg.config_dict["regression_criteria"]["ptq"],
criteria=reg_cfg.config_dict["regression_criteria"][test_type],
reg_threshold=0.10,
result_dict=self.performance[template.name],
)
ptq_eval_elapsed_time = timer() - ptq_eval_start_time

self.performance[template.name][TIME_LOG["ptq_time"]] = round(ptq_elapsed_time, 3)
self.performance[template.name][TIME_LOG["ptq_eval_time"]] = round(ptq_eval_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type][reg_cfg.label_type][reg_cfg.train_type]["ptq"].append(self.performance)
reg_cfg.update_result(test_type, self.performance)

assert test_result["passed"] is True, test_result["log"]
12 changes: 9 additions & 3 deletions tests/regression/action/test_action_detection.py
Original file line number Diff line number Diff line change
@@ -46,17 +46,20 @@ class TestRegressionActionDetection:

@classmethod
@pytest.fixture(scope="class")
def reg_cfg(cls):
def reg_cfg(cls, tmp_dir_path):
results_root = os.environ.get("REG_RESULTS_ROOT", tmp_dir_path)
cls.reg_cfg = RegressionTestConfig(
cls.TASK_TYPE,
cls.TRAIN_TYPE,
cls.LABEL_TYPE,
os.getcwd(),
train_params=cls.TRAIN_PARAMS,
results_root=results_root,
)

yield cls.reg_cfg

print(f"\nwritting regression result to {cls.reg_cfg.result_dir}/result_{cls.TRAIN_TYPE}_{cls.LABEL_TYPE}.json")
with open(f"{cls.reg_cfg.result_dir}/result_{cls.TRAIN_TYPE}_{cls.LABEL_TYPE}.json", "w") as result_file:
json.dump(cls.reg_cfg.result_dict, result_file, indent=4)

@@ -66,6 +69,7 @@ def setup_method(self):
@e2e_pytest_component
@pytest.mark.parametrize("template", templates, ids=templates_ids)
def test_otx_train(self, reg_cfg, template, tmp_dir_path):
test_type = "train"
self.performance[template.name] = {}

tmp_dir_path = tmp_dir_path / reg_cfg.task_type
@@ -79,21 +83,23 @@ def test_otx_train(self, reg_cfg, template, tmp_dir_path):
tmp_dir_path,
reg_cfg.otx_dir,
reg_cfg.args,
reg_cfg.config_dict["regression_criteria"]["train"],
reg_cfg.config_dict["regression_criteria"][test_type],
self.performance[template.name],
)
infer_elapsed_time = timer() - infer_start_time

self.performance[template.name][TIME_LOG["train_time"]] = round(train_elapsed_time, 3)
self.performance[template.name][TIME_LOG["infer_time"]] = round(infer_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type][reg_cfg.label_type][reg_cfg.train_type]["train"].append(self.performance)
reg_cfg.update_result(test_type, self.performance)

assert test_result["passed"] is True, test_result["log"]

@e2e_pytest_component
@pytest.mark.parametrize("template", templates, ids=templates_ids)
def test_otx_train_kpi_test(self, reg_cfg, template):
performance = reg_cfg.get_template_performance(template)
if performance is None:
pytest.skip(reason="Cannot find performance data from results.")

kpi_train_result = regression_train_time_testing(
train_time_criteria=reg_cfg.config_dict["kpi_e2e_train_time_criteria"]["train"],
40 changes: 27 additions & 13 deletions tests/regression/anomaly/test_anomaly_classificaiton.py
Original file line number Diff line number Diff line change
@@ -53,14 +53,21 @@ class TestRegressionAnomalyClassification:

@classmethod
@pytest.fixture(scope="class")
def reg_cfg(cls):
def reg_cfg(cls, tmp_dir_path):
results_root = os.environ.get("REG_RESULTS_ROOT", tmp_dir_path)
cls.reg_cfg = RegressionTestConfig(
cls.TASK_TYPE, cls.TRAIN_TYPE, cls.LABEL_TYPE, os.getcwd(), enable_auto_num_worker=False
cls.TASK_TYPE,
cls.TRAIN_TYPE,
cls.LABEL_TYPE,
os.getcwd(),
enable_auto_num_worker=False,
results_root=results_root,
)

yield cls.reg_cfg

with open(f"{cls.reg_cfg.result_dir}/result_{cls.TRAIN_TYPE}_{cls.LABEL_TYPE}.json", "w") as result_file:
print(f"\nwritting regression result to {cls.reg_cfg.result_dir}/result_{cls.TRAIN_TYPE}_{cls.LABEL_TYPE}.json")
with open(f"{cls.reg_cfg.result_dir}/result_{cls.TASK_TYPE}.json", "w") as result_file:
json.dump(cls.reg_cfg.result_dict, result_file, indent=4)

def setup_method(self):
@@ -81,6 +88,7 @@ def _apply_category(self, data_dict, category):
@pytest.mark.parametrize("template", templates, ids=templates_ids)
@pytest.mark.parametrize("category", SAMPLED_ANOMALY_DATASET_CATEGORIES)
def test_otx_train(self, reg_cfg, template, tmp_dir_path, category):
test_type = "train"
self.performance[template.name] = {}
category_data_args = self._apply_category(reg_cfg.args, category)

@@ -95,14 +103,14 @@ def test_otx_train(self, reg_cfg, template, tmp_dir_path, category):
tmp_dir_path,
reg_cfg.otx_dir,
category_data_args,
reg_cfg.config_dict["regression_criteria"]["train"][category],
reg_cfg.config_dict["regression_criteria"][test_type][category],
self.performance[template.name],
)
infer_elapsed_time = timer() - infer_start_time

self.performance[template.name][TIME_LOG["train_time"]] = round(train_elapsed_time, 3)
self.performance[template.name][TIME_LOG["infer_time"]] = round(infer_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type]["train"][category].append(self.performance)
reg_cfg.update_result(test_type, self.performance, is_anomaly=True, category=category)

assert test_result["passed"] is True, test_result["log"]

@@ -112,6 +120,8 @@ def test_otx_train(self, reg_cfg, template, tmp_dir_path, category):
def test_otx_train_kpi_test(self, reg_cfg, template, category):
"""KPI tests: measure the train+val time and evaluation time and compare with criteria."""
performance = reg_cfg.get_template_performance(template, category=category)
if performance is None:
pytest.skip(reason="Cannot find performance data from results.")

# Compare train+val time with the KPI criteria.
kpi_train_result = regression_train_time_testing(
@@ -136,6 +146,7 @@ def test_otx_train_kpi_test(self, reg_cfg, template, category):
def test_otx_export_eval_openvino(self, reg_cfg, template, tmp_dir_path, category):
if category in ["transistor", "cable"]:
pytest.skip("Issue#2189: Anomaly task sometimes shows performance drop")
test_type = "export"
self.performance[template.name] = {}
category_data_args = self._apply_category(reg_cfg.args, category)

@@ -151,15 +162,15 @@ def test_otx_export_eval_openvino(self, reg_cfg, template, tmp_dir_path, categor
reg_cfg.otx_dir,
category_data_args,
threshold=0.05,
criteria=reg_cfg.config_dict["regression_criteria"]["export"][category],
criteria=reg_cfg.config_dict["regression_criteria"][test_type][category],
reg_threshold=0.10,
result_dict=self.performance[template.name],
)
export_eval_elapsed_time = timer() - export_eval_start_time

self.performance[template.name][TIME_LOG["export_time"]] = round(export_elapsed_time, 3)
self.performance[template.name][TIME_LOG["export_eval_time"]] = round(export_eval_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type]["export"][category].append(self.performance)
reg_cfg.update_result(test_type, self.performance, is_anomaly=True, category=category)

assert test_result["passed"] is True, test_result["log"]

@@ -169,6 +180,7 @@ def test_otx_export_eval_openvino(self, reg_cfg, template, tmp_dir_path, categor
def test_otx_deploy_eval_deployment(self, reg_cfg, template, tmp_dir_path, category):
if category in ["transistor", "cable"]:
pytest.skip("Issue#2189: Anomaly task sometimes shows performance drop")
test_type = "deploy"
self.performance[template.name] = {}
category_data_args = self._apply_category(reg_cfg.args, category)

@@ -184,15 +196,15 @@ def test_otx_deploy_eval_deployment(self, reg_cfg, template, tmp_dir_path, categ
reg_cfg.otx_dir,
category_data_args,
threshold=0.0,
criteria=reg_cfg.config_dict["regression_criteria"]["deploy"][category],
criteria=reg_cfg.config_dict["regression_criteria"][test_type][category],
reg_threshold=0.10,
result_dict=self.performance[template.name],
)
deploy_eval_elapsed_time = timer() - deploy_eval_start_time

self.performance[template.name][TIME_LOG["deploy_time"]] = round(deploy_elapsed_time, 3)
self.performance[template.name][TIME_LOG["deploy_eval_time"]] = round(deploy_eval_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type]["deploy"][category].append(self.performance)
reg_cfg.update_result(test_type, self.performance, is_anomaly=True, category=category)

assert test_result["passed"] is True, test_result["log"]

@@ -202,6 +214,7 @@ def test_otx_deploy_eval_deployment(self, reg_cfg, template, tmp_dir_path, categ
def test_nncf_optimize_eval(self, reg_cfg, template, tmp_dir_path, category):
if category in ["transistor", "cable", "bottle"]:
pytest.skip("Issue#2189: Anomaly task sometimes shows performance drop")
test_type = "nncf"
self.performance[template.name] = {}
category_data_args = self._apply_category(reg_cfg.args, category)

@@ -220,22 +233,23 @@ def test_nncf_optimize_eval(self, reg_cfg, template, tmp_dir_path, category):
reg_cfg.otx_dir,
category_data_args,
threshold=0.01,
criteria=reg_cfg.config_dict["regression_criteria"]["nncf"][category],
criteria=reg_cfg.config_dict["regression_criteria"][test_type][category],
reg_threshold=0.10,
result_dict=self.performance[template.name],
)
nncf_eval_elapsed_time = timer() - nncf_eval_start_time

self.performance[template.name][TIME_LOG["nncf_time"]] = round(nncf_elapsed_time, 3)
self.performance[template.name][TIME_LOG["nncf_eval_time"]] = round(nncf_eval_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type]["nncf"][category].append(self.performance)
reg_cfg.update_result(test_type, self.performance, is_anomaly=True, category=category)

assert test_result["passed"] is True, test_result["log"]

@e2e_pytest_component
@pytest.mark.parametrize("template", templates, ids=templates_ids)
@pytest.mark.parametrize("category", SAMPLED_ANOMALY_DATASET_CATEGORIES)
def test_ptq_optimize_eval(self, reg_cfg, template, tmp_dir_path, category):
test_type = "ptq"
self.performance[template.name] = {}
category_data_args = self._apply_category(reg_cfg.args, category)

@@ -250,14 +264,14 @@ def test_ptq_optimize_eval(self, reg_cfg, template, tmp_dir_path, category):
tmp_dir_path,
reg_cfg.otx_dir,
category_data_args,
criteria=reg_cfg.config_dict["regression_criteria"]["ptq"][category],
criteria=reg_cfg.config_dict["regression_criteria"][test_type][category],
reg_threshold=0.10,
result_dict=self.performance[template.name],
)
ptq_eval_elapsed_time = timer() - ptq_eval_start_time

self.performance[template.name][TIME_LOG["ptq_time"]] = round(ptq_elapsed_time, 3)
self.performance[template.name][TIME_LOG["ptq_eval_time"]] = round(ptq_eval_elapsed_time, 3)
reg_cfg.result_dict[reg_cfg.task_type]["ptq"][category].append(self.performance)
reg_cfg.update_result(test_type, self.performance, is_anomaly=True, category=category)

assert test_result["passed"] is True, test_result["log"]
Loading