diff --git a/.azure-pipelines/templates/automation_test.yml b/.azure-pipelines/templates/automation_test.yml index 443c0f224aa..21c89b29c92 100644 --- a/.azure-pipelines/templates/automation_test.yml +++ b/.azure-pipelines/templates/automation_test.yml @@ -7,6 +7,14 @@ parameters: - name: profile type: string default: latest +- name: instance_cnt + type: string + default: '1' +- name: instance_idx + type: string + default: '1' + # instance_cnt = 8, instance_idx = 3: means we have 8 instances totally, and now we are scheduling modules on third instance + # instance_cnt = 1, instance_idx = 1: means we only have 1 instance, so we don't need to schedule modules - name: fullTest displayName: Run full test? type: boolean @@ -56,7 +64,7 @@ steps: azdev test --no-exitfirst --repo=./ --src=HEAD --tgt=origin/$(System.PullRequest.TargetBranch) --cli-ci --profile ${{ parameters.profile }} --verbose --series --pytest-args "--durations=0" else echo "Running full test" - python scripts/ci/automation_full_test.py "${{ parameters.profile }}" "$serial_modules" + python scripts/ci/automation_full_test.py "${{ parameters.instance_cnt }}" "${{ parameters.instance_idx }}" "${{ parameters.profile }}" "$serial_modules" fi displayName: "azdev test" env: diff --git a/azure-pipelines-full-tests.yml b/azure-pipelines-full-tests.yml index 3617a69238c..1186255ea37 100644 --- a/azure-pipelines-full-tests.yml +++ b/azure-pipelines-full-tests.yml @@ -13,26 +13,6 @@ pr: - '*' jobs: -- job: AutomationTest - displayName: Automation Test (Profile Latest) - timeoutInMinutes: 120 - pool: - vmImage: 'ubuntu-20.04' - strategy: - matrix: - Python36: - python.version: '3.6' - Python38: - python.version: '3.8' - Python310: - python.version: '3.10' - steps: - - template: .azure-pipelines/templates/automation_test.yml - parameters: - pythonVersion: '$(python.version)' - profile: 'latest' - fullTest: true - - job: AutomationTest20200901 displayName: Automation Test (Profile 2020-09-01) timeoutInMinutes: 120 @@ -92,3 +72,102 @@ jobs: pythonVersion: '$(python.version)' profile: '2018-03-01-hybrid' fullTest: true + +- job: AutomationFullTestPython36ProfileLatest + displayName: Automation Full Test Python36 Profile Latest + timeoutInMinutes: 9999 + strategy: + maxParallel: 8 + matrix: + instance1: + Instance_idx: 1 + instance2: + Instance_idx: 2 + instance3: + Instance_idx: 3 + instance4: + Instance_idx: 4 + instance5: + Instance_idx: 5 + instance6: + Instance_idx: 6 + instance7: + Instance_idx: 7 + instance8: + Instance_idx: 8 + pool: + vmImage: 'ubuntu-20.04' + steps: + - template: .azure-pipelines/templates/automation_test.yml + parameters: + pythonVersion: '3.6' + profile: 'latest' + instance_cnt: '8' + instance_idx: '$(Instance_idx)' + fullTest: true + +- job: AutomationFullTestPython38ProfileLatest + displayName: Automation Full Test Python38 Profile Latest + timeoutInMinutes: 9999 + strategy: + maxParallel: 8 + matrix: + instance1: + Instance_idx: 1 + instance2: + Instance_idx: 2 + instance3: + Instance_idx: 3 + instance4: + Instance_idx: 4 + instance5: + Instance_idx: 5 + instance6: + Instance_idx: 6 + instance7: + Instance_idx: 7 + instance8: + Instance_idx: 8 + pool: + vmImage: 'ubuntu-20.04' + steps: + - template: .azure-pipelines/templates/automation_test.yml + parameters: + pythonVersion: '3.8' + profile: 'latest' + instance_cnt: '8' + instance_idx: '$(Instance_idx)' + fullTest: true + +- job: AutomationFullTestPython310ProfileLatest + displayName: Automation Full Test Python310 Profile Latest + timeoutInMinutes: 9999 + strategy: + maxParallel: 8 + matrix: + instance1: + Instance_idx: 1 + instance2: + Instance_idx: 2 + instance3: + Instance_idx: 3 + instance4: + Instance_idx: 4 + instance5: + Instance_idx: 5 + instance6: + Instance_idx: 6 + instance7: + Instance_idx: 7 + instance8: + Instance_idx: 8 + pool: + vmImage: 'ubuntu-20.04' + steps: + - template: .azure-pipelines/templates/automation_test.yml + parameters: + pythonVersion: '3.10' + profile: 'latest' + instance_cnt: '8' + instance_idx: '$(Instance_idx)' + fullTest: true diff --git a/scripts/ci/automation_full_test.py b/scripts/ci/automation_full_test.py index a0323525fb5..16dfa7ae6ad 100644 --- a/scripts/ci/automation_full_test.py +++ b/scripts/ci/automation_full_test.py @@ -15,15 +15,120 @@ ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) logger.addHandler(ch) -profile = sys.argv[1] -serial_modules = sys.argv[2].split() + +# sys.argv is passed by .azure-pipelines/templates/automation_test.yml in section `Running full test` +instance_cnt = int(sys.argv[1]) +instance_idx = int(sys.argv[2]) +profile = sys.argv[3] +serial_modules = sys.argv[4].split() +jobs = { + 'acr': 45, + 'acs': 62, + 'advisor': 18, + 'ams': 136, + 'apim': 30, + 'appconfig': 41, + 'appservice': 150, # series + # 'appservice': 157, # parallel + 'aro': 33, + 'backup': 76, + 'batch': 21, + 'batchai': 24, + 'billing': 21, + 'botservice': 25, # series + # 'botservice': 28, # parallel + 'cdn': 36, + 'cloud': 18, # series + # 'cloud': 22, # parallel + 'cognitiveservices': 24, + 'config': 21, + 'configure': 17, + 'consumption': 21, + 'container': 19, + 'cosmosdb': 45, + 'databoxedge': 25, + 'deploymentmanager': 18, + 'dla': 19, + 'dls': 22, + 'dms': 22, + 'eventgrid': 24, + 'eventhubs': 24, + 'extension': 0, + 'feedback': 31, + 'find': 22, + 'hdinsight': 34, + 'identity': 18, + 'interactive': 18, + 'iot': 57, + 'keyvault': 39, + 'kusto': 23, + 'lab': 19, + 'managedservices': 18, + 'maps': 19, + 'marketplaceordering': 18, + 'monitor': 66, + 'natgateway': 22, + 'netappfiles': 48, + 'network': 364, # series + # 'network': 182, # parallel + 'policyinsights': 20, + 'privatedns': 29, + 'profile': 20, + 'rdbms': 89, + 'redis': 31, + 'relay': 22, + 'reservations': 20, + 'resource': 101, + 'role': 38, + 'search': 34, + 'security': 23, + 'servicebus': 24, + 'serviceconnector': 56, + 'servicefabric': 49, + 'signalr': 20, + 'sql': 117, + 'sqlvm': 31, + 'storage': 108, + 'synapse': 45, + 'util': 18, + 'vm': 313, + 'azure-cli': 16, + 'azure-cli-core': 26, + 'azure-cli-telemetry': 18, + 'azure-cli-testsdk': 20, + } class AutomaticScheduling(object): def __init__(self): + """ + self.jobs: Record the test time of each module + self.modules: All modules and core, ignore extensions + self.serial_modules: All modules which need to execute in serial mode + self.works: Record which modules each worker needs to test + self.instance_cnt: + The total number of concurrent automation full test pipeline instance with specify python version + Because we share the vm pool with azure-sdk team, so we can't set the number of concurrency arbitrarily + Best practice is to keep the number of concurrent tasks below 50 + If you set a larger number of concurrency, it will cause many instances to be in the waiting state + And the network module has the largest number of test cases and can only be tested serially for now, so setting instance_cnt = 8 is sufficient + Total concurrent number: AutomationTest20200901 * 3 + AutomationTest20190301 * 3 + AutomationTest20180301 * 3 + AutomationFullTest * 8 * 3 (python_version) = 33 + self.instance_idx: + The index of concurrent automation full test pipeline instance with specify python version + For example: + instance_cnt = 8, instance_idx = 3: means we have 8 instances totally, and now we are scheduling modules on third instance + instance_cnt = 1, instance_idx = 1: means we only have 1 instance, so we don't need to schedule modules + """ + self.jobs = [] self.modules = {} self.serial_modules = serial_modules + self.works = [] + self.instance_cnt = instance_cnt + self.instance_idx = instance_idx + for i in range(self.instance_cnt): + worker = {} + self.works.append(worker) self.profile = profile def get_all_modules(self): @@ -31,12 +136,46 @@ def get_all_modules(self): # only get modules and core, ignore extensions self.modules = {**result['mod'], **result['core']} - def run_modules(self): - # divide all modules into parallel or serial execution + def append_new_modules(self): + # If add a new module, use average test time + avg_cost = int(sum(jobs.values()) / len(jobs.values())) + for module in self.modules: + if module not in jobs.keys(): + jobs[module] = avg_cost + # sort jobs by time cost (desc) + self.jobs = sorted(jobs.items(), key=lambda item: -item[1]) + + def get_worker(self): + """ + Use greedy algorithm distribute jobs to each worker + For each job, we assign it to the worker with the fewest jobs currently + :return worker number + """ + for idx, worker in enumerate(self.works): + tmp_time = sum(worker.values()) if sum(worker.values()) else 0 + if idx == 0: + worker_time = tmp_time + worker_num = idx + if tmp_time < worker_time: + worker_time = tmp_time + worker_num = idx + return worker_num + + def get_instance_modules(self): + # get modules which need to execute in the pipeline instance with specific index + for k, v in self.jobs: + idx = self.get_worker() + self.works[idx][k] = v + # instance_idx: 1~n, python list index: 0~n-1 + self.instance_idx -= 1 + return self.works[self.instance_idx] + + def run_instance_modules(self, instance_modules): + # divide the modules that the current instance needs to execute into parallel or serial execution error_flag = False serial_tests = [] parallel_tests = [] - for k, v in self.modules.items(): + for k, v in instance_modules.items(): if k in self.serial_modules: serial_tests.append(k) else: @@ -62,9 +201,11 @@ def run_modules(self): def main(): logger.info("Start automation full test ...\n") - autoschduling = AutomaticScheduling() - autoschduling.get_all_modules() - sys.exit(1) if autoschduling.run_modules() else sys.exit(0) + autoscheduling = AutomaticScheduling() + autoscheduling.get_all_modules() + autoscheduling.append_new_modules() + instance_modules = autoscheduling.get_instance_modules() + sys.exit(1) if autoscheduling.run_instance_modules(instance_modules) else sys.exit(0) if __name__ == '__main__':