From a8de88f2bc3cce7d2714b2e7d2b256220b0bb144 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Mon, 9 Dec 2024 22:39:25 +1100 Subject: [PATCH] add: test results for performance test cases - generally quite a bit faster now but there seems to be something still potentially wrong when translations and/or lots of choices/itemsets are involved, e.g. 10K text = ~5 seconds, vs 10K select + 20K choices = ~80 seconds. - switched to using `convert()` directly since this now accepts markdown input, and it avoids potentially confounding effects of extra things done in `assertPyxformXform` (such as re-parsing XML). - this had a minor impact (approx 10%) on test execution time but considerable impact (approx 30%) on memory usage. --- tests/test_dynamic_default.py | 18 ++++++++++-------- tests/test_translations.py | 20 +++++++++++--------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/tests/test_dynamic_default.py b/tests/test_dynamic_default.py index 595dd31e..05d48c1c 100644 --- a/tests/test_dynamic_default.py +++ b/tests/test_dynamic_default.py @@ -10,6 +10,7 @@ import psutil from pyxform import utils +from pyxform.xls2xform import convert from tests.pyxform_test_case import PyxformTestCase from tests.xpath_helpers.choices import xpc @@ -774,13 +775,14 @@ def test_dynamic_default_performance__time(self): """ Should find the dynamic default check costs little extra relative time large forms. - Results with Python 3.9.10 on VM with 4CPU 8GB RAM, x questions each, average of - 10 runs (seconds), with and without the check, per question: - | num | with | without | - | 500 | 0.4599 | 0.4535 | - | 1000 | 0.9234 | 0.9195 | - | 2000 | 2.1118 | 1.9917 | - | 5000 | 4.9563 | 4.8714 | + Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 1GB RAM, x questions + each, average of 10 runs (seconds), with and without the check, per question: + | num | with | without | peak RSS MB | + | 500 | 0.2415 | 0.2512 | 58 | + | 1000 | 0.4754 | 0.5199 | 63 | + | 2000 | 0.9866 | 1.2936 | 67 | + | 5000 | 3.1041 | 2.7132 | 96 | + | 10000 | 5.4795 | 5.3229 | 133 | """ survey_header = """ | survey | | | | | @@ -798,7 +800,7 @@ def run(name, case): results = [] while runs < 10: start = perf_counter() - self.assertPyxformXform(md=case) + convert(xlsform=case) results.append(perf_counter() - start) runs += 1 print(name, round(sum(results) / len(results), 4)) diff --git a/tests/test_translations.py b/tests/test_translations.py index b644dde6..4ef9c88d 100644 --- a/tests/test_translations.py +++ b/tests/test_translations.py @@ -1,7 +1,7 @@ """ Test translations syntax. """ - +from pyxform.xls2xform import convert import unittest from dataclasses import dataclass from time import perf_counter @@ -396,13 +396,15 @@ def test_missing_translations_check_performance(self): """ Should find the translations check costs a fraction of a second for large forms. - Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 4GB RAM, x questions + Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 1GB RAM, x questions with 2 choices each, average of 10 runs (seconds), with and without the check, per question: - | num | with | without | - | 500 | 3.0420 | 3.0427 | - | 1000 | 9.7641 | 9.6972 | - | 2000 | 30.645 | 28.869 | + | num | with | without | peak RSS MB | + | 500 | 1.0235 | 0.9831 | 74 | + | 1000 | 2.3025 | 2.6332 | 101 | + | 2000 | 5.6960 | 6.2805 | 157 | + | 5000 | 23.439 | 25.327 | 265 | + | 10000 | 80.396 | 75.165 | 480 | """ survey_header = """ | survey | | | | | @@ -419,7 +421,7 @@ def test_missing_translations_check_performance(self): | | c{i} | na | la-d | la-e | | | c{i} | nb | lb-d | lb-e | """ - for count in (500, 1000, 2000): + for count in (10000, ): questions = "\n".join(question.format(i=i) for i in range(1, count)) choice_lists = "\n".join(choice_list.format(i=i) for i in range(1, count)) md = "".join((survey_header, questions, choices_header, choice_lists)) @@ -429,10 +431,10 @@ def run(name, case): results = [] while runs < 10: start = perf_counter() - self.assertPyxformXform(md=case) + convert(xlsform=case) results.append(perf_counter() - start) runs += 1 - print(name, sum(results) / len(results)) + print(name, round(sum(results) / len(results), 4)) run(name=f"questions={count}, with check (seconds):", case=md)