diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 00000000000..aa03340b44f --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,33 @@ +## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 + +## See https://s.apache.org/asfyaml + +github: + description: "Apache Storm" + homepage: https://storm.apache.org/ + protected_branches: + # Prevent force pushes to primary branches + master: {} + custom_subjects: + new_pr: "[PR] {title} ({repository})" + close_pr: "Re: [PR] {title} ({repository})" + comment_pr: "Re: [PR] {title} ({repository})" + diffcomment: "Re: [PR] {title} ({repository})" + merge_pr: "Re: [PR] {title} ({repository})" + new_issue: "[I] {title} ({repository})" + comment_issue: "Re: [I] {title} ({repository})" + close_issue: "Re: [I] {title} ({repository})" + catchall: "[GH] {title} ({repository})" + new_discussion: "[D] {title} ({repository})" + edit_discussion: "Re: [D] {title} ({repository})" + close_discussion: "Re: [D] {title} ({repository})" + close_discussion_with_comment: "Re: [D] {title} ({repository})" + reopen_discussion: "Re: [D] {title} ({repository})" + new_comment_discussion: "Re: [D] {title} ({repository})" + edit_comment_discussion: "Re: [D] {title} ({repository})" + delete_comment_discussion: "Re: [D] {title} ({repository})" + labels: + - apache + - storm + - streaming + - distributed diff --git a/.gitattributes b/.gitattributes index ed9fb85f896..44c89df2e96 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + # Some storm-webapp logviewer tests require input files to have LF line endings due to byte counting. storm-webapp/src/test/resources/*.log.test text eol=lf diff --git a/.github/workflows/maven.yaml b/.github/workflows/maven.yaml index d00c76fdc97..a426d9b69ed 100644 --- a/.github/workflows/maven.yaml +++ b/.github/workflows/maven.yaml @@ -28,7 +28,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - java: [ 8, 11 ] + java: [ 11, 17 ] module: [ Client, Server, Core, External, Check-Updated-License-Files, Integration-Test ] experimental: [false] fail-fast: false @@ -69,4 +69,4 @@ jobs: run: | export JDK_VERSION=${{ matrix.java }} export USER=github - /bin/bash ./dev-tools/gitact/gitact-script.sh `pwd` ${{ matrix.module }}; \ No newline at end of file + /bin/bash ./dev-tools/gitact/gitact-script.sh `pwd` ${{ matrix.module }}; diff --git a/DEPENDENCY-LICENSES b/DEPENDENCY-LICENSES index 8d55798f9cf..583c7951717 100644 --- a/DEPENDENCY-LICENSES +++ b/DEPENDENCY-LICENSES @@ -15,6 +15,7 @@ List of third-party dependencies grouped by their license type. Apache License + * carbonite (org.clojars.bipinprasad:carbonite:1.6.0 - https://github.com/bipinprasad/carbonite) * HttpClient (commons-httpclient:commons-httpclient:3.1 - http://jakarta.apache.org/httpcomponents/httpclient-3.x/) * Log4j Implemented Over SLF4J (org.slf4j:log4j-over-slf4j:1.7.36 - http://www.slf4j.org) @@ -25,7 +26,6 @@ List of third-party dependencies grouped by their license type. * ActiveMQ :: KahaDB Store (org.apache.activemq:activemq-kahadb-store:5.16.5 - http://activemq.apache.org/activemq-kahadb-store) * ActiveMQ :: MQTT Protocol (org.apache.activemq:activemq-mqtt:5.16.5 - http://activemq.apache.org/activemq-mqtt) * ActiveMQ :: Openwire Legacy Support (org.apache.activemq:activemq-openwire-legacy:5.16.5 - http://activemq.apache.org/activemq-openwire-legacy) - * ActiveMQ Protocol Buffers Implementation and Compiler (org.apache.activemq.protobuf:activemq-protobuf:1.1 - http://activemq.apache.org/activemq-protobuf) * Aether :: API (org.sonatype.aether:aether-api:1.7 - http://aether.sonatype.org/aether-api/) * Aether :: Implementation (org.sonatype.aether:aether-impl:1.7 - http://aether.sonatype.org/aether-impl/) * Aether :: SPI (org.sonatype.aether:aether-spi:1.7 - http://aether.sonatype.org/aether-spi/) @@ -218,9 +218,8 @@ List of third-party dependencies grouped by their license type. * Caffeine cache (com.github.ben-manes.caffeine:caffeine:2.3.5 - https://github.com/ben-manes/caffeine) * Calcite Core (org.apache.calcite:calcite-core:1.14.0 - https://calcite.apache.org/calcite-core) * Calcite Linq4j (org.apache.calcite:calcite-linq4j:1.14.0 - https://calcite.apache.org/calcite-linq4j) - * carbonite (com.twitter:carbonite:1.5.0 - no url defined) * CDI APIs (javax.enterprise:cdi-api:1.0 - http://www.seamframework.org/Weld/cdi-api) - * chill-java (com.twitter:chill-java:0.8.0 - https://github.com/twitter/chill) + * chill-java (com.twitter:chill-java:0.9.5 - https://github.com/twitter/chill) * ClassMate (com.fasterxml:classmate:1.3.1 - http://github.com/cowtowncoder/java-classmate) * CloudWatch Metrics for AWS Java SDK (com.amazonaws:aws-java-sdk-cloudwatchmetrics:1.10.77 - https://aws.amazon.com/sdkforjava) * Codec (commons-codec:commons-codec:1.3 - http://jakarta.apache.org/commons/codec/) @@ -483,15 +482,15 @@ List of third-party dependencies grouped by their license type. * snappy-java (org.xerial.snappy:snappy-java:1.1.1.3 - https://github.comm/xerial/snappy-java) * snappy-java (org.xerial.snappy:snappy-java:1.1.2.6 - https://github.com/xerial/snappy-java) * Spatial4J (com.spatial4j:spatial4j:0.5 - http://nexus.sonatype.org/oss-repository-hosting.html/spatial4j) - * Spring AOP (org.springframework:spring-aop:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Beans (org.springframework:spring-beans:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Commons Logging Bridge (org.springframework:spring-jcl:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Context (org.springframework:spring-context:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Core (org.springframework:spring-core:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Expression Language (SpEL) (org.springframework:spring-expression:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring JMS (org.springframework:spring-jms:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Messaging (org.springframework:spring-messaging:5.3.26 - https://github.com/spring-projects/spring-framework) - * Spring Transaction (org.springframework:spring-tx:5.3.26 - https://github.com/spring-projects/spring-framework) + * Spring AOP (org.springframework:spring-aop:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Beans (org.springframework:spring-beans:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Commons Logging Bridge (org.springframework:spring-jcl:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Context (org.springframework:spring-context:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Core (org.springframework:spring-core:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Expression Language (SpEL) (org.springframework:spring-expression:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring JMS (org.springframework:spring-jms:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Messaging (org.springframework:spring-messaging:5.3.27 - https://github.com/spring-projects/spring-framework) + * Spring Transaction (org.springframework:spring-tx:5.3.27 - https://github.com/spring-projects/spring-framework) * StAX API (stax:stax-api:1.0.1 - http://stax.codehaus.org/) * T-Digest (com.tdunning:t-digest:3.0 - https://github.com/tdunning/t-digest) * Tephra API (co.cask.tephra:tephra-api:0.6.0 - https://github.com/caskdata/tephra/tephra-api) @@ -549,7 +548,7 @@ List of third-party dependencies grouped by their license type. Apache License, Version 2.0, GNU General Public License, version 2 - * RocksDB JNI (org.rocksdb:rocksdbjni:6.27.3 - https://rocksdb.org) + * RocksDB JNI (org.rocksdb:rocksdbjni:8.1.1 - https://rocksdb.org) Apache License, Version 2.0, GNU Lesser General Public License (LGPL), Version 2.1 diff --git a/DEVELOPER.md b/DEVELOPER.md index 82598670288..698cec8357f 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -247,7 +247,7 @@ To pull in a merge request you should generally follow the command line instruct ## Prerequisites -In order to build `storm` you need `python`, `ruby` and `nodejs`. In order to avoid an overfull page we don't provide platform/OS specific installation instructions for those here. Please refer to you platform's/OS' documentation for support. +In order to build `storm` you need `python3`, `ruby` and `nodejs`. In order to avoid an overfull page we don't provide platform/OS specific installation instructions for those here. Please refer to you platform's/OS' documentation for support. The `ruby` package manager `rvm` and `nodejs` package manager `nvm` are for convenience and are used in the tests which run on [GitHub actions](https://github.com/apache/storm/actions). They can be installed using `curl -L https://get.rvm.io | bash -s stable --autolibs=enabled && source ~/.profile` (see the [rvm installation instructions](https://github.com/rvm/rvm) for details) and `wget -qO- https://raw.githubusercontent.com/creationix/nvm/v0.26.1/install.sh | bash && source ~/.bashrc` (see the [nvm installation instructions](https://github.com/creationix/nvm) for details). @@ -279,7 +279,7 @@ If you wish to skip the unit tests you can do this by adding `-DskipTests` to th If you wish to skip the examples and external modules, you can do this by adding `-P '!examples,!externals'` to the command line. -In case you modified `storm.thrift`, you have to regenerate thrift code as java and python code before compiling whole project. +In case you modified `storm.thrift`, you have to regenerate thrift code as Java and Python code before compiling whole project. ```sh cd storm-client/src diff --git a/LICENSE-binary b/LICENSE-binary index ab91f4c2ef2..71b19385484 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -645,6 +645,7 @@ The license texts of these dependencies can be found in the licenses directory. Apache License + * carbonite (org.clojars.bipinprasad:carbonite:1.6.0 - https://github.com/bipinprasad/carbonite) * HttpClient (commons-httpclient:commons-httpclient:3.1 - http://jakarta.apache.org/httpcomponents/httpclient-3.x/) Apache License, Version 2.0 @@ -773,8 +774,7 @@ The license texts of these dependencies can be found in the licenses directory. * BoneCP :: Core Library (com.jolbox:bonecp:0.8.0.RELEASE - http://jolbox.com/bonecp) * Calcite Core (org.apache.calcite:calcite-core:1.14.0 - https://calcite.apache.org/calcite-core) * Calcite Linq4j (org.apache.calcite:calcite-linq4j:1.14.0 - https://calcite.apache.org/calcite-linq4j) - * carbonite (com.twitter:carbonite:1.5.0 - no url defined) - * chill-java (com.twitter:chill-java:0.8.0 - https://github.com/twitter/chill) + * chill-java (com.twitter:chill-java:0.9.5 - https://github.com/twitter/chill) * ClassMate (com.fasterxml:classmate:1.3.1 - http://github.com/cowtowncoder/java-classmate) * com.papertrail:profiler (com.papertrail:profiler:1.0.2 - https://github.com/papertrail/profiler) * Commons Configuration (commons-configuration:commons-configuration:1.6 - http://commons.apache.org/${pom.artifactId.substring(8)}/) @@ -936,7 +936,7 @@ The license texts of these dependencies can be found in the licenses directory. * JAX-RS provider for JSON content type (org.codehaus.jackson:jackson-jaxrs:1.9.13 - http://jackson.codehaus.org) * Xml Compatibility extensions for Jackson (org.codehaus.jackson:jackson-xc:1.9.13 - http://jackson.codehaus.org) * Javassist (org.javassist:javassist:3.24.1-GA - http://www.javassist.org/) - * RocksDB JNI (org.rocksdb:rocksdbjni:6.27.3 - https://rocksdb.org) + * RocksDB JNI (org.rocksdb:rocksdbjni:8.1.1 - https://rocksdb.org) * JCTools Core (org.jctools:jctools-core:2.0.1 - http://jctools.github.io/JCTools/) * Bean Validation API (javax.validation:validation-api:2.0.1.Final - http://beanvalidation.org) * jersey-container-grizzly2-http (org.glassfish.jersey.containers:jersey-container-grizzly2-http:2.29 - https://projects.eclipse.org/projects/ee4j.jersey/project/jersey-container-grizzly2-http) diff --git a/SECURITY.md b/SECURITY.md index e41c31a2c96..1d1bb5225a7 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -414,6 +414,17 @@ min.user.id=$(min_user_id) where `worker_launcher_group` is the same group the supervisor user is a part of, and `min.user.id` is set to the first real user id on the system. This config file also needs to be owned by root and *not* have world nor group write permissions. + +### Storm‐Netty Authentication + +The authentication for Netty connections between workers by default is disabled. +It can either be set for your cluster or on a per topology basis. This setting will prevent any +unauthorized messages from getting processed. The config for enabling the +Storm‐Netty authentication is as follows: +```yaml +storm.messaging.netty.authentication: true +``` + ### Impersonating a user A storm client may submit requests on behalf of another user. For example, if a `userX` submits an oozie workflow and as part of workflow execution if user `oozie` wants to submit a topology on behalf of `userX` it can do so by leveraging the impersonation feature. In order to submit a topology as some other user, you can use the `StormSubmitter.submitTopologyAs` API. Alternatively you can use `NimbusClient.getConfiguredClientAs` @@ -498,5 +509,3 @@ nimbus.groups: ### DRPC Hopefully more on this soon - - diff --git a/bin/storm b/bin/storm index f30f05f0213..65e3bda45eb 100755 --- a/bin/storm +++ b/bin/storm @@ -35,13 +35,13 @@ done # check for version if [ -z $PYTHON ]; then - PYTHON="/usr/bin/env python" + PYTHON="/usr/bin/env python3" fi majversion=`$PYTHON -V 2>&1 | awk '{print $2}' | cut -d'.' -f1` minversion=`$PYTHON -V 2>&1 | awk '{print $2}' | cut -d'.' -f2` numversion=$(( 10 * $majversion + $minversion)) -if (( $numversion < 26 )); then - echo "Need python version > 2.6" +if (( $numversion < 30 )); then + echo "Need Python version > 3.0" exit 1 fi diff --git a/bin/storm.ps1 b/bin/storm.ps1 index 0b1220aebe2..b98bdee8344 100644 --- a/bin/storm.ps1 +++ b/bin/storm.ps1 @@ -23,12 +23,12 @@ while((Get-Item $PRG).LinkType -eq "SymbolicLink") { } # Check for Python version -$PythonVersion = (& python -V 2>&1).Split(" ")[1]; +$PythonVersion = (& python3 -V 2>&1).Split(" ")[1]; $PythonMajor = [int]$PythonVersion.Split(".")[0]; $PythonMinor = [int]$PythonVersion.Split(".")[1]; $PythonNumVersion = $PythonMajor * 10 + $PythonMinor; -if($PythonNumVersion -le 26) { - Write-Output "Need python version > 2.6"; +if($PythonNumVersion -le 30) { + Write-Output "Need Python version > 3.0"; exit 1; } @@ -64,6 +64,6 @@ if(Test-Path $StormEnvPath) { } $ArgsForProcess = @(([io.path]::combine("$STORM_BIN_DIR", "storm.py"))) + $args -Start-Process -FilePath python -ArgumentList $ArgsForProcess -Wait -NoNewWindow +Start-Process -FilePath python3 -ArgumentList $ArgsForProcess -Wait -NoNewWindow exit $LastExitCode diff --git a/bin/storm.py b/bin/storm.py index ed51fc20aa8..6d7374142da 100755 --- a/bin/storm.py +++ b/bin/storm.py @@ -95,7 +95,6 @@ def confvalue(name, storm_config_opts, extrapaths, overriding_conf_file=None, da "-cp", get_classpath(extrajars=extrapaths, daemon=daemon), "org.apache.storm.command.ConfigValue", name ] output = subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0] - # python 3 if not isinstance(output, str): output = output.decode('utf-8') lines = output.split(os.linesep) @@ -905,7 +904,7 @@ def initialize_admin_subcommand(subparsers): def initialize_shell_subcommand(subparsers): command_help = """ Archives resources to jar and uploads jar to Nimbus, and executes following arguments on "local". Useful for non JVM languages. - eg: `storm shell resources/ python topology.py arg1 arg2`""" + eg: `storm shell resources/ python3 topology.py arg1 arg2`""" sub_parser = subparsers.add_parser("shell", help=command_help, formatter_class=SortingHelpFormatter) diff --git a/conf/defaults.yaml b/conf/defaults.yaml index da4bc16c3fd..8b7d87b3ff2 100644 --- a/conf/defaults.yaml +++ b/conf/defaults.yaml @@ -242,6 +242,7 @@ storm.messaging.netty.transfer.batch.size: 262144 storm.messaging.netty.socket.backlog: 500 # By default, the Netty SASL authentication is set to false. Users can override and set it true for a specific topology. +# see https://issues.apache.org/jira/browse/STORM-348 for more details storm.messaging.netty.authentication: false # Default plugin to use for automatic network topology discovery diff --git a/dev-tools/gitact/gitact-script.sh b/dev-tools/gitact/gitact-script.sh index 2178ad27f64..c3e1a309fce 100755 --- a/dev-tools/gitact/gitact-script.sh +++ b/dev-tools/gitact/gitact-script.sh @@ -20,7 +20,7 @@ set -x STORM_SRC_ROOT_DIR=$1 -TRAVIS_SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +THIS_SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) cd "${STORM_SRC_ROOT_DIR}" || (echo "Cannot cd to ${STORM_SRC_ROOT_DIR}"; exit 1) @@ -50,8 +50,8 @@ then fi # We should be concerned that Travis CI could be very slow because it uses VM export STORM_TEST_TIMEOUT_MS=150000 -# Travis only has 3GB of memory, lets use 1GB for build, and 1.5GB for forked JVMs -export MAVEN_OPTS="-Xmx1024m" +# Github Action Runner only has 7GB of memory, lets use 1.5GB for build, with enough stack to run tests +export MAVEN_OPTS="-Xmx2048m" mvn --batch-mode test -fae -Pnative,all-tests,examples,externals -Prat -pl "$TEST_MODULES" BUILD_RET_VAL=$? @@ -59,7 +59,7 @@ BUILD_RET_VAL=$? for dir in $(find . -type d -and -wholename \*/target/\*-reports) do echo "Looking for errors in ${dir}" - python3 "${TRAVIS_SCRIPT_DIR}"/print-errors-from-test-reports.py "${dir}" + python3 "${THIS_SCRIPT_DIR}"/print-errors-from-test-reports.py "${dir}" done exit ${BUILD_RET_VAL} diff --git a/dev-tools/jira-github-join.py b/dev-tools/jira-github-join.py index 3e23ea1e507..a6e1580b6b2 100755 --- a/dev-tools/jira-github-join.py +++ b/dev-tools/jira-github-join.py @@ -23,7 +23,7 @@ (https://stackoverflow.com/questions/50236117/scraping-ssl-certificate-verify-failed-error-for-http-en-wikipedia-org) Go to Macintosh HD > Applications - > Python3.9 folder (or whatever version of python you're using) + > Python3.9 folder (or whatever version of Python you're using) > double click on "Install Certificates.command" file. """ diff --git a/dev-tools/report/__init__.py b/dev-tools/report/__init__.py index e931fe08fad..81a0e86e291 100644 --- a/dev-tools/report/__init__.py +++ b/dev-tools/report/__init__.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/Clojure-DSL.md b/docs/Clojure-DSL.md index 56bb54f8d14..e8485079d4f 100644 --- a/docs/Clojure-DSL.md +++ b/docs/Clojure-DSL.md @@ -77,7 +77,7 @@ Here's an example `shell-bolt-spec`: ```clojure (shell-bolt-spec {"1" :shuffle "2" ["id"]} - "python" + "python3" "mybolt.py" ["outfield1" "outfield2"] :p 25) diff --git a/docs/Command-line-client.md b/docs/Command-line-client.md index e2b34625fe1..352e1519e2a 100644 --- a/docs/Command-line-client.md +++ b/docs/Command-line-client.md @@ -313,7 +313,7 @@ Syntax: `storm shell resourcesdir command args` Makes constructing jar and uploading to nimbus for using non JVM languages -eg: `storm shell resources/ python topology.py arg1 arg2` +eg: `storm shell resources/ python3 topology.py arg1 arg2` ### upload-credentials diff --git a/docs/Defining-a-non-jvm-language-dsl-for-storm.md b/docs/Defining-a-non-jvm-language-dsl-for-storm.md index 311577b6474..aad5525085e 100644 --- a/docs/Defining-a-non-jvm-language-dsl-for-storm.md +++ b/docs/Defining-a-non-jvm-language-dsl-for-storm.md @@ -20,19 +20,26 @@ For a Python DSL, you would want to make use of "2" and "3". ShellComponent lets There's a "storm shell" command that will help with submitting a topology. Its usage is like this: ``` -storm shell resources/ python topology.py arg1 arg2 +storm shell resources/ python3 topology.py arg1 arg2 ``` storm shell will then package resources/ into a jar, upload the jar to Nimbus, and call your topology.py script like this: ``` -python topology.py arg1 arg2 {nimbus-host} {nimbus-port} {uploaded-jar-location} +python3 topology.py arg1 arg2 {nimbus-host} {nimbus-port} {uploaded-jar-location} ``` Then you can connect to Nimbus using the Thrift API and submit the topology, passing {uploaded-jar-location} into the submitTopology method. For reference, here's the submitTopology definition: ```java -void submitTopology(1: string name, 2: string uploadedJarLocation, 3: string jsonConf, 4: StormTopology topology) throws (1: AlreadyAliveException e, 2: InvalidTopologyException ite); +void submitTopology( + 1: string name, + 2: string uploadedJarLocation, + 3: string jsonConf, + 4: StormTopology topology) + throws ( + 1: AlreadyAliveException e, + 2: InvalidTopologyException ite); ``` Finally, one of the key things to do in a non-JVM DSL is make it easy to define the entire topology in one file (the bolts, spouts, and the definition of the topology). diff --git a/docs/Multilang-protocol.md b/docs/Multilang-protocol.md index ae77b19907a..77cb9314cbd 100644 --- a/docs/Multilang-protocol.md +++ b/docs/Multilang-protocol.md @@ -30,10 +30,10 @@ program via the shell using Java's ProcessBuilder class. By default the ShellProcess assumes that your code is packaged inside of your topology jar under the resources subdirectory of your jar and by default will change the current working directory of the executable process to be that resources directory extracted from the jar. -A jar file does not store permissions of the files in it. This includes the execute bit that would allow a shell script to be laoded and run by the operating systme. -As such in most examples the scripts are of the form `python mybolt.py` because the python executable is already on the supervisor and mybolt is packaged in the resources directory of the jar. +A jar file does not store permissions of the files in it. This includes the execute bit that would allow a shell script to be loaded and run by the operating systme. +As such in most examples the scripts are of the form `python3 mybolt.py` because the Python executable is already on the supervisor and mybolt is packaged in the resources directory of the jar. -If you want to package something more complicated, like a new version of python itself, you need to instead use the blob store for this and a `.tgz` archive that does support permissions. +If you want to package something more complicated, like a new version of Python itself, you need to instead use the blob store for this and a `.tgz` archive that does support permissions. See the docs on the [Blob Store](distcache-blobstore.html) for more details on how to ship a jar. @@ -49,7 +49,7 @@ So if I shipped python with a symlink named `newPython` and a python ShellSpout ``` public MyShellSpout() { - super("./newPython/bin/python", "./shell_spout.py"); + super("./newPython/bin/python3", "./shell_spout.py"); changeChildCWD(false); } ``` diff --git a/docs/OCI-support.md b/docs/OCI-support.md index c73fcb5f0dd..a44959ea1c3 100644 --- a/docs/OCI-support.md +++ b/docs/OCI-support.md @@ -46,7 +46,7 @@ Docker does not need to be installed on each node, nor is there a dependency on by an admin before containers can be launched. All that is required to be present on each node is an OCI-compatible runtime like `runc`. -##### Leverages Distributed File Sytems For Scale +##### Leverages Distributed File Systems For Scale Image can be fetched via HDFS or other distributed file systems instead of the Docker registry. This prevents a large cluster from overwhelming a Docker registry when a big topology causes all of the nodes to request an image at once. This also allows large clusters @@ -66,7 +66,7 @@ localization is also faster, as the layers no longer need to be unpacked into a First you need to use the`docker-to-squash.py` script to download docker images and configs, convert layers to squashfs files and put them to a directory in HDFS, for example ```bash -python docker-to-squash.py pull-build-push-update --hdfs-root hdfs://hostname:port/containers \ +python3 docker-to-squash.py pull-build-push-update --hdfs-root hdfs://hostname:port/containers \ docker.xxx.com:4443/hadoop-user-images/storm/rhel7:20201202-232133,storm/rhel7:dev_current --log DEBUG --bootstrap ``` diff --git a/docs/Resource_Aware_Scheduler_overview.md b/docs/Resource_Aware_Scheduler_overview.md index 4d03f88c525..7fb0e31e26c 100644 --- a/docs/Resource_Aware_Scheduler_overview.md +++ b/docs/Resource_Aware_Scheduler_overview.md @@ -377,6 +377,7 @@ The metrics with -1:__system are generally metrics for the entire worker. In th The Memory usage is similar but look at the usedBytes. offHeap is 64621728 or about 62MB, and onHeap is 83857888 or about 80MB, but you should know what you set your heap to in each of your workers already. How do you divide this up per bolt/spout? That is a bit harder and may require some trial and error from your end.
+ ## Enhancements on original DefaultResourceAwareStrategy The default resource aware scheduling strategy as described in the paper above has two main scheduling phases: diff --git a/docs/Setting-up-a-Storm-cluster.md b/docs/Setting-up-a-Storm-cluster.md index f4a26d2c393..b86d47f7ce9 100644 --- a/docs/Setting-up-a-Storm-cluster.md +++ b/docs/Setting-up-a-Storm-cluster.md @@ -30,7 +30,7 @@ A few notes about Zookeeper deployment: Next you need to install Storm's dependencies on Nimbus and the worker machines. These are: 1. Java 8+ (Apache Storm 2.x is tested through GitHub actions against Java 8 and Java 11) -2. Python 2.7.x or Python 3.x +2. Python 3.x These are the versions of the dependencies that have been tested with Storm. Storm may or may not work with different versions of Java and/or Python. diff --git a/docs/Tutorial.md b/docs/Tutorial.md index 1dd891ba4a7..c4ebffaec44 100644 --- a/docs/Tutorial.md +++ b/docs/Tutorial.md @@ -251,7 +251,7 @@ Here's the definition of the `SplitSentence` bolt from `WordCountTopology`: ```java public static class SplitSentence extends ShellBolt implements IRichBolt { public SplitSentence() { - super("python", "splitsentence.py"); + super("python3", "splitsentence.py"); } public void declareOutputFields(OutputFieldsDeclarer declarer) { @@ -260,7 +260,7 @@ public static class SplitSentence extends ShellBolt implements IRichBolt { } ``` -`SplitSentence` overrides `ShellBolt` and declares it as running using `python` with the arguments `splitsentence.py`. Here's the implementation of `splitsentence.py`: +`SplitSentence` overrides `ShellBolt` and declares it as running using `python3` with the arguments `splitsentence.py`. Here's the implementation of `splitsentence.py`: ```python import storm @@ -282,7 +282,7 @@ Earlier on in this tutorial, we skipped over a few aspects of how tuples are emi ## Trident -Storm guarantees that every message will be played through the topology at least once. A common question asked is "how do you do things like counting on top of Storm? Won't you overcount?" Storm has a higher level API called Trudent that let you achieve exactly-once messaging semantics for most computations. Read more about Trident [here](Trident-tutorial.html). +Storm guarantees that every message will be played through the topology at least once. A common question asked is "how do you do things like counting on top of Storm? Won't you overcount?" Storm has a higher level API called Trident that let you achieve exactly-once messaging semantics for most computations. Read more about Trident [here](Trident-tutorial.html). ## Distributed RPC diff --git a/docs/Using-non-JVM-languages-with-Storm.md b/docs/Using-non-JVM-languages-with-Storm.md index 1b3ae451b91..da809340906 100644 --- a/docs/Using-non-JVM-languages-with-Storm.md +++ b/docs/Using-non-JVM-languages-with-Storm.md @@ -6,13 +6,13 @@ layout: documentation - creating topologies in another language is easy since topologies are just thrift structures (link to storm.thrift) - implementing spouts and bolts in another language is called a "multilang components" or "shelling" - Here's a specification of the protocol: [Multilang protocol](Multilang-protocol.html) - - the thrift structure lets you define multilang components explicitly as a program and a script (e.g., python and the file implementing your bolt) + - the thrift structure lets you define multilang components explicitly as a program and a script (e.g., python3 and the file implementing your bolt) - In Java, you override ShellBolt or ShellSpout to create multilang components - note that output fields declarations happens in the thrift structure, so in Java you create multilang components like the following: - declare fields in java, processing code in the other language by specifying it in constructor of shellbolt - multilang uses json messages over stdin/stdout to communicate with the subprocess - - storm comes with ruby, python, and fancy adapters that implement the protocol. show an example of python - - python supports emitting, anchoring, acking, and logging + - storm comes with Ruby, Python, and Fancy adapters that implement the protocol. show an example of Python + - Python supports emitting, anchoring, acking, and logging - "storm shell" command makes constructing jar and uploading to nimbus easy - makes jar and uploads it - calls your program with host/port of nimbus and the jarfile id @@ -31,18 +31,18 @@ union ComponentObject { } ``` -For a non-JVM DSL, you would want to make use of "2" and "3". ShellComponent lets you specify a script to run that component (e.g., your python code). And JavaObject lets you specify native java spouts and bolts for the component (and Storm will use reflection to create that spout or bolt). +For a non-JVM DSL, you would want to make use of "2" and "3". ShellComponent lets you specify a script to run that component (e.g., your Python code). And JavaObject lets you specify native java spouts and bolts for the component (and Storm will use reflection to create that spout or bolt). There's a "storm shell" command that will help with submitting a topology. Its usage is like this: ``` -storm shell resources/ python topology.py arg1 arg2 +storm shell resources/ python3 topology.py arg1 arg2 ``` storm shell will then package resources/ into a jar, upload the jar to Nimbus, and call your topology.py script like this: ``` -python topology.py arg1 arg2 {nimbus-host} {nimbus-port} {uploaded-jar-location} +python3 topology.py arg1 arg2 {nimbus-host} {nimbus-port} {uploaded-jar-location} ``` Then you can connect to Nimbus using the Thrift API and submit the topology, passing {uploaded-jar-location} into the submitTopology method. For reference, here's the submitTopology definition: diff --git a/docs/flux.md b/docs/flux.md index 000270f7731..cf83741c480 100644 --- a/docs/flux.md +++ b/docs/flux.md @@ -47,7 +47,7 @@ The easiest way to use Flux, is to add it as a Maven dependency in you project a If you would like to build Flux from source and run the unit/integration tests, you will need the following installed on your system: -* Python 2.7.x or later +* Python 3.0.x or later * Node.js 0.10.x or later #### Building with unit tests enabled: @@ -649,7 +649,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 @@ -796,7 +796,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 diff --git a/docs/storm-sql-example.md b/docs/storm-sql-example.md index 61f255b9096..fbd53185fc8 100644 --- a/docs/storm-sql-example.md +++ b/docs/storm-sql-example.md @@ -13,8 +13,7 @@ This page assumes that Apache Zookeeper, Apache Storm and Apache Kafka are insta For convenience, this page assumes that Apache Kafka 0.10.0 is installed via `brew`. We'll use below tools to prepare the JSON data which will be fed to the input data source. -Since they're Python projects, this page assumes Python 2.7 with `pip`, `virtualenv` is installed locally. -If you're using Python 3, you may need to convert some places to be compatible with 3 manually while feeding data. +Since they're Python projects, this page assumes Python 3.0.x with `pip3`, `virtualenv` is installed locally. * https://github.com/kiritbasu/Fake-Apache-Log-Generator * https://github.com/rory/apache-log-parser @@ -92,10 +91,10 @@ For convenience, you can skip cloning project and download modified file from he `apache-log-parser` can be installed via `pip`. ``` -$ pip install apache-log-parser +$ pip3 install apache-log-parser ``` -Since apache-log-parser is a library, in order to parse fake log we need to write small python script. +Since apache-log-parser is a library, in order to parse fake log we need to write small Python script. Let's create file `parse-fake-log-gen-to-json-with-incrementing-id.py` with below content: ``` @@ -115,9 +114,8 @@ while True: parsed_dict['id'] = auto_incr_id auto_incr_id += 1 - # works only python 2, but I don't care cause it's just a test module :) parsed_dict = {k.upper(): v for k, v in parsed_dict.iteritems() if not k.endswith('datetimeobj')} - print json.dumps(parsed_dict) + print(json.dumps(parsed_dict)) ``` ### Feed parsed JSON Apache Log to Kafka @@ -125,7 +123,7 @@ while True: OK! We're prepared to feed the data to Kafka topic. Let's use `kafka-console-producer` to feed parsed JSON. ``` -$ python apache-fake-log-gen.py -n 0 | python parse-fake-log-gen-to-json-with-incrementing-id.py | kafka-console-producer --broker-list localhost:9092 --topic apache-logs +$ python3 apache-fake-log-gen.py -n 0 | python3 parse-fake-log-gen-to-json-with-incrementing-id.py | kafka-console-producer --broker-list localhost:9092 --topic apache-logs ``` and execute below to another terminal session to confirm data is being fed. diff --git a/examples/storm-elasticsearch-examples/pom.xml b/examples/storm-elasticsearch-examples/pom.xml index 10847303c8e..195fb8b4d3e 100644 --- a/examples/storm-elasticsearch-examples/pom.xml +++ b/examples/storm-elasticsearch-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-hbase-examples/pom.xml b/examples/storm-hbase-examples/pom.xml index 26037499193..8e32febbe47 100644 --- a/examples/storm-hbase-examples/pom.xml +++ b/examples/storm-hbase-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-hdfs-examples/pom.xml b/examples/storm-hdfs-examples/pom.xml index fd341817b8b..f7b30e99554 100644 --- a/examples/storm-hdfs-examples/pom.xml +++ b/examples/storm-hdfs-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-hive-examples/pom.xml b/examples/storm-hive-examples/pom.xml index 9ee1ba28cf1..c990df7481a 100644 --- a/examples/storm-hive-examples/pom.xml +++ b/examples/storm-hive-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-jdbc-examples/pom.xml b/examples/storm-jdbc-examples/pom.xml index 9634a209b0f..7d1890bbc2a 100644 --- a/examples/storm-jdbc-examples/pom.xml +++ b/examples/storm-jdbc-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-jms-examples/pom.xml b/examples/storm-jms-examples/pom.xml index 73973801322..f2a0d202050 100644 --- a/examples/storm-jms-examples/pom.xml +++ b/examples/storm-jms-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -29,7 +29,7 @@ storm-jms-examples - 5.3.26 + 5.3.27 diff --git a/examples/storm-kafka-client-examples/pom.xml b/examples/storm-kafka-client-examples/pom.xml index 7e1b64b8921..d564ca60f55 100644 --- a/examples/storm-kafka-client-examples/pom.xml +++ b/examples/storm-kafka-client-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-loadgen/pom.xml b/examples/storm-loadgen/pom.xml index 5120c8cb529..9d5072b5a25 100644 --- a/examples/storm-loadgen/pom.xml +++ b/examples/storm-loadgen/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml storm-loadgen diff --git a/examples/storm-loadgen/src/main/java/org/apache/storm/loadgen/ThroughputVsLatency.java b/examples/storm-loadgen/src/main/java/org/apache/storm/loadgen/ThroughputVsLatency.java index 4a139cf8bb1..6ade66851a7 100644 --- a/examples/storm-loadgen/src/main/java/org/apache/storm/loadgen/ThroughputVsLatency.java +++ b/examples/storm-loadgen/src/main/java/org/apache/storm/loadgen/ThroughputVsLatency.java @@ -261,7 +261,6 @@ public static void main(String[] args) throws Exception { } conf.put(Config.TOPOLOGY_WORKER_METRICS, workerMetrics); conf.put(Config.TOPOLOGY_BUILTIN_METRICS_BUCKET_SIZE_SECS, 10); - conf.put(Config.TOPOLOGY_WORKER_CHILDOPTS, "-Xmx2g"); TopologyBuilder builder = new TopologyBuilder(); diff --git a/examples/storm-mongodb-examples/pom.xml b/examples/storm-mongodb-examples/pom.xml index 4f0d37d64b0..f7b0b78b3a5 100644 --- a/examples/storm-mongodb-examples/pom.xml +++ b/examples/storm-mongodb-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-mqtt-examples/pom.xml b/examples/storm-mqtt-examples/pom.xml index f16261eb2fb..d07bc9b84f9 100644 --- a/examples/storm-mqtt-examples/pom.xml +++ b/examples/storm-mqtt-examples/pom.xml @@ -26,7 +26,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -41,6 +41,14 @@ org.apache.storm storm-mqtt ${project.version} + + + + org.apache.activemq.protobuf + activemq-protobuf + + + org.apache.storm @@ -59,10 +67,24 @@ org.apache.activemq activemq-mqtt + + + + org.apache.activemq.protobuf + activemq-protobuf + + org.apache.activemq activemq-kahadb-store + + + + org.apache.activemq.protobuf + activemq-protobuf + + com.google.guava diff --git a/examples/storm-opentsdb-examples/pom.xml b/examples/storm-opentsdb-examples/pom.xml index 0b94a0d52ff..6099e04d88b 100644 --- a/examples/storm-opentsdb-examples/pom.xml +++ b/examples/storm-opentsdb-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-perf/pom.xml b/examples/storm-perf/pom.xml index f452233e13e..86fb8db1e61 100644 --- a/examples/storm-perf/pom.xml +++ b/examples/storm-perf/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-pmml-examples/pom.xml b/examples/storm-pmml-examples/pom.xml index b080707df65..afced750e9c 100644 --- a/examples/storm-pmml-examples/pom.xml +++ b/examples/storm-pmml-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/examples/storm-redis-examples/pom.xml b/examples/storm-redis-examples/pom.xml index 3a7631c3479..cd94f273f9e 100644 --- a/examples/storm-redis-examples/pom.xml +++ b/examples/storm-redis-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-rocketmq-examples/pom.xml b/examples/storm-rocketmq-examples/pom.xml index 0faf1645f86..c0ae8eb481f 100644 --- a/examples/storm-rocketmq-examples/pom.xml +++ b/examples/storm-rocketmq-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-solr-examples/pom.xml b/examples/storm-solr-examples/pom.xml index 0d91da790dd..6014fa5d17a 100644 --- a/examples/storm-solr-examples/pom.xml +++ b/examples/storm-solr-examples/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-starter/pom.xml b/examples/storm-starter/pom.xml index 04b5c9494f3..91f74891c90 100644 --- a/examples/storm-starter/pom.xml +++ b/examples/storm-starter/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/examples/storm-starter/src/jvm/org/apache/storm/starter/BlobStoreAPIWordCountTopology.java b/examples/storm-starter/src/jvm/org/apache/storm/starter/BlobStoreAPIWordCountTopology.java index 46df78eeb82..f8059f3c1fc 100644 --- a/examples/storm-starter/src/jvm/org/apache/storm/starter/BlobStoreAPIWordCountTopology.java +++ b/examples/storm-starter/src/jvm/org/apache/storm/starter/BlobStoreAPIWordCountTopology.java @@ -247,7 +247,7 @@ public void declareOutputFields(OutputFieldsDeclarer declarer) { public static class SplitSentence extends ShellBolt implements IRichBolt { public SplitSentence() { - super("python", "splitsentence.py"); + super("python3", "splitsentence.py"); } @Override diff --git a/examples/storm-starter/src/jvm/org/apache/storm/starter/WordCountTopology.java b/examples/storm-starter/src/jvm/org/apache/storm/starter/WordCountTopology.java index ff3e4c33748..71a3b42ed28 100644 --- a/examples/storm-starter/src/jvm/org/apache/storm/starter/WordCountTopology.java +++ b/examples/storm-starter/src/jvm/org/apache/storm/starter/WordCountTopology.java @@ -61,7 +61,7 @@ protected int run(String[] args) throws Exception { public static class SplitSentence extends ShellBolt implements IRichBolt { public SplitSentence() { - super("python", "splitsentence.py"); + super("python3", "splitsentence.py"); } @Override diff --git a/external/storm-autocreds/pom.xml b/external/storm-autocreds/pom.xml index 5f7b999bd6b..bc1ae7a0879 100644 --- a/external/storm-autocreds/pom.xml +++ b/external/storm-autocreds/pom.xml @@ -19,7 +19,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 @@ -194,6 +194,10 @@ hive-webhcat-java-client ${hive.version} + + org.pentaho + pentaho-aggdesigner-algorithm + org.slf4j slf4j-log4j12 diff --git a/external/storm-blobstore-migration/pom.xml b/external/storm-blobstore-migration/pom.xml index 398e55cdd3d..0ec323b6e54 100644 --- a/external/storm-blobstore-migration/pom.xml +++ b/external/storm-blobstore-migration/pom.xml @@ -21,7 +21,7 @@ limitations under the License. storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -107,8 +107,8 @@ limitations under the License. maven-compiler-plugin - 1.8 - 1.8 + ${maven.compiler.source} + ${maven.compiler.target} diff --git a/external/storm-cassandra/pom.xml b/external/storm-cassandra/pom.xml index 9379c486979..eb3df0484b3 100644 --- a/external/storm-cassandra/pom.xml +++ b/external/storm-cassandra/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -36,6 +36,7 @@ 3.11 4.1.1 4.15.0 + --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED --add-opens java.base/java.math=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.util.concurrent=ALL-UNNAMED --add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens java.base/java.nio=ALL-UNNAMED --add-opens java.base/jdk.internal.misc=ALL-UNNAMED @@ -150,6 +151,7 @@ + @@ -176,6 +178,21 @@ false + + org.apache.maven.plugins + maven-surefire-plugin + + ${storm-cassandra.test.introspection.argLine} -Xmx3g -XX:+HeapDumpOnOutOfMemoryError + true + IntegrationTest | ${java.unit.test.exclude.groups} + false + 1.0C + true + + ${project.basedir}/target/testhome + + + diff --git a/external/storm-elasticsearch/pom.xml b/external/storm-elasticsearch/pom.xml index 11a1e1f96bb..f46fda4d8c9 100644 --- a/external/storm-elasticsearch/pom.xml +++ b/external/storm-elasticsearch/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-eventhubs/pom.xml b/external/storm-eventhubs/pom.xml index 99672b163a4..65f845ccbec 100755 --- a/external/storm-eventhubs/pom.xml +++ b/external/storm-eventhubs/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -38,9 +38,9 @@ package - + - + run diff --git a/external/storm-hbase/pom.xml b/external/storm-hbase/pom.xml index 4a93432e1ca..238f7854204 100644 --- a/external/storm-hbase/pom.xml +++ b/external/storm-hbase/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-hdfs-blobstore/pom.xml b/external/storm-hdfs-blobstore/pom.xml index 586d194b40b..04d93923bfb 100644 --- a/external/storm-hdfs-blobstore/pom.xml +++ b/external/storm-hdfs-blobstore/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-hdfs-oci/pom.xml b/external/storm-hdfs-oci/pom.xml index 08457669237..175119fc2f0 100644 --- a/external/storm-hdfs-oci/pom.xml +++ b/external/storm-hdfs-oci/pom.xml @@ -19,7 +19,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/external/storm-hdfs/pom.xml b/external/storm-hdfs/pom.xml index 259ce073ae4..ef0b27b35bc 100644 --- a/external/storm-hdfs/pom.xml +++ b/external/storm-hdfs/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-hive/pom.xml b/external/storm-hive/pom.xml index f10a40bd2f7..68bb07791c3 100644 --- a/external/storm-hive/pom.xml +++ b/external/storm-hive/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -104,6 +104,10 @@ hive-hcatalog-core ${hive.version} + + org.pentaho + pentaho-aggdesigner-algorithm + log4j log4j @@ -159,6 +163,10 @@ junit junit + + org.pentaho + pentaho-aggdesigner-algorithm + diff --git a/external/storm-jdbc/pom.xml b/external/storm-jdbc/pom.xml index 2672917ae33..fbfc9b31bce 100644 --- a/external/storm-jdbc/pom.xml +++ b/external/storm-jdbc/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-jms/pom.xml b/external/storm-jms/pom.xml index da225bc3490..cad7441ddea 100644 --- a/external/storm-jms/pom.xml +++ b/external/storm-jms/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-kafka-client/pom.xml b/external/storm-kafka-client/pom.xml index b7b868aea63..36a92c1fb27 100644 --- a/external/storm-kafka-client/pom.xml +++ b/external/storm-kafka-client/pom.xml @@ -22,7 +22,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-kafka-migration/pom.xml b/external/storm-kafka-migration/pom.xml index 52a062bce6e..ad5bfb76a1f 100644 --- a/external/storm-kafka-migration/pom.xml +++ b/external/storm-kafka-migration/pom.xml @@ -22,7 +22,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-kafka-monitor/pom.xml b/external/storm-kafka-monitor/pom.xml index 7e030e2ac21..426f7f8a9a2 100644 --- a/external/storm-kafka-monitor/pom.xml +++ b/external/storm-kafka-monitor/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/external/storm-kinesis/pom.xml b/external/storm-kinesis/pom.xml index e7b6ca2011e..e05bc4a4e83 100644 --- a/external/storm-kinesis/pom.xml +++ b/external/storm-kinesis/pom.xml @@ -17,7 +17,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/external/storm-metrics/pom.xml b/external/storm-metrics/pom.xml index 79c0d998139..e53a3e728ed 100644 --- a/external/storm-metrics/pom.xml +++ b/external/storm-metrics/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -73,7 +73,7 @@ prepare validate - + @@ -89,7 +89,7 @@ - + run diff --git a/external/storm-mongodb/pom.xml b/external/storm-mongodb/pom.xml index 093c0dbedc7..66bf9fb9ee6 100644 --- a/external/storm-mongodb/pom.xml +++ b/external/storm-mongodb/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-mqtt/pom.xml b/external/storm-mqtt/pom.xml index ca08047252b..10a07bcaf70 100644 --- a/external/storm-mqtt/pom.xml +++ b/external/storm-mqtt/pom.xml @@ -25,7 +25,7 @@ org.apache.storm storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml @@ -47,6 +47,13 @@ org.apache.activemq activemq-broker test + + + + org.apache.activemq + activemq-protobuf + + org.apache.activemq diff --git a/external/storm-opentsdb/pom.xml b/external/storm-opentsdb/pom.xml index 3ac193964e0..dc38d94ee9c 100644 --- a/external/storm-opentsdb/pom.xml +++ b/external/storm-opentsdb/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-pmml/pom.xml b/external/storm-pmml/pom.xml index 557ff914710..17cca059115 100644 --- a/external/storm-pmml/pom.xml +++ b/external/storm-pmml/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/external/storm-redis/pom.xml b/external/storm-redis/pom.xml index 055a23642a7..1bf7240cf88 100644 --- a/external/storm-redis/pom.xml +++ b/external/storm-redis/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-rocketmq/pom.xml b/external/storm-rocketmq/pom.xml index ee245d96a92..969ac1d30ad 100644 --- a/external/storm-rocketmq/pom.xml +++ b/external/storm-rocketmq/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/external/storm-solr/pom.xml b/external/storm-solr/pom.xml index d1ce9c80868..3fb67f6ee9d 100644 --- a/external/storm-solr/pom.xml +++ b/external/storm-solr/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/flux/README.md b/flux/README.md index 47bc7d6df4d..eb10d5a9c2f 100644 --- a/flux/README.md +++ b/flux/README.md @@ -44,7 +44,7 @@ The easiest way to use Flux, is to add it as a Maven dependency in you project a If you would like to build Flux from source and run the unit/integration tests, you will need the following installed on your system: -* Python 2.6.x or later +* Python 3.0.x or later * Node.js 0.10.x or later #### Building with unit tests enabled: @@ -640,7 +640,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 @@ -787,7 +787,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 diff --git a/flux/flux-core/pom.xml b/flux/flux-core/pom.xml index bee08885695..b3210bfcd81 100644 --- a/flux/flux-core/pom.xml +++ b/flux/flux-core/pom.xml @@ -21,7 +21,7 @@ org.apache.storm flux - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/flux/flux-core/src/test/resources/configs/bad_shell_test.yaml b/flux/flux-core/src/test/resources/configs/bad_shell_test.yaml index 0892ce71662..a7f03b41d4c 100644 --- a/flux/flux-core/src/test/resources/configs/bad_shell_test.yaml +++ b/flux/flux-core/src/test/resources/configs/bad_shell_test.yaml @@ -69,7 +69,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] configMethods: diff --git a/flux/flux-core/src/test/resources/configs/kafka_test.yaml b/flux/flux-core/src/test/resources/configs/kafka_test.yaml index 76d2ee8c2f1..a6f38bbb27a 100644 --- a/flux/flux-core/src/test/resources/configs/kafka_test.yaml +++ b/flux/flux-core/src/test/resources/configs/kafka_test.yaml @@ -71,7 +71,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 diff --git a/flux/flux-core/src/test/resources/configs/shell_test.yaml b/flux/flux-core/src/test/resources/configs/shell_test.yaml index dfab3976035..b18d846e9ca 100644 --- a/flux/flux-core/src/test/resources/configs/shell_test.yaml +++ b/flux/flux-core/src/test/resources/configs/shell_test.yaml @@ -69,7 +69,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] configMethods: diff --git a/flux/flux-core/src/test/resources/configs/substitution-test.yaml b/flux/flux-core/src/test/resources/configs/substitution-test.yaml index 67ac92aa393..759ee38d0fd 100644 --- a/flux/flux-core/src/test/resources/configs/substitution-test.yaml +++ b/flux/flux-core/src/test/resources/configs/substitution-test.yaml @@ -67,7 +67,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 diff --git a/flux/flux-examples/pom.xml b/flux/flux-examples/pom.xml index 0883eac4f2f..470af82ef2e 100644 --- a/flux/flux-examples/pom.xml +++ b/flux/flux-examples/pom.xml @@ -21,7 +21,7 @@ org.apache.storm flux - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/flux/flux-examples/src/main/resources/kafka_spout.yaml b/flux/flux-examples/src/main/resources/kafka_spout.yaml index 37f14f1a28e..93896805100 100644 --- a/flux/flux-examples/src/main/resources/kafka_spout.yaml +++ b/flux/flux-examples/src/main/resources/kafka_spout.yaml @@ -71,7 +71,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 diff --git a/flux/flux-examples/src/main/resources/multilang.yaml b/flux/flux-examples/src/main/resources/multilang.yaml index aaab5d3b303..a0d9dfef403 100644 --- a/flux/flux-examples/src/main/resources/multilang.yaml +++ b/flux/flux-examples/src/main/resources/multilang.yaml @@ -47,7 +47,7 @@ bolts: className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" constructorArgs: # command line - - ["python", "splitsentence.py"] + - ["python3", "splitsentence.py"] # output fields - ["word"] parallelism: 1 diff --git a/flux/flux-wrappers/pom.xml b/flux/flux-wrappers/pom.xml index f9f9c9e58d2..064e5860b54 100644 --- a/flux/flux-wrappers/pom.xml +++ b/flux/flux-wrappers/pom.xml @@ -21,7 +21,7 @@ org.apache.storm flux - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/bolts/FluxShellBolt.java b/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/bolts/FluxShellBolt.java index 8f8916b7be0..3881e93be78 100644 --- a/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/bolts/FluxShellBolt.java +++ b/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/bolts/FluxShellBolt.java @@ -65,7 +65,7 @@ public FluxShellBolt(String[] command, String[] outputFields) { * className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" * constructorArgs: * # command line - * - ["python", "splitsentence.py"] + * - ["python3", "splitsentence.py"] * # output fields * - ["word"] * configMethods: @@ -90,7 +90,7 @@ public void addComponentConfig(String key, Object value) { * className: "org.apache.storm.flux.wrappers.bolts.FluxShellBolt" * constructorArgs: * # command line - * - ["python", "splitsentence.py"] + * - ["python3", "splitsentence.py"] * # output fields * - ["word"] * configMethods: @@ -118,7 +118,7 @@ public void addComponentConfig(String key, List values) { * - className: org.apache.storm.flux.wrappers.bolts.FluxShellBolt * id: my_bolt * constructorArgs: - * - [python, my_bolt.py] + * - [python3, my_bolt.py] * configMethods: * - name: setDefaultStream * args: @@ -139,7 +139,7 @@ public void setDefaultStream(String[] outputFields) { * - className: org.apache.storm.flux.wrappers.bolts.FluxShellBolt * id: my_bolt * constructorArgs: - * - [python, my_bolt.py] + * - [python3, my_bolt.py] * configMethods: * - name: setNamedStream * args: diff --git a/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/spouts/FluxShellSpout.java b/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/spouts/FluxShellSpout.java index 89495fe32c3..caa920f5b06 100644 --- a/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/spouts/FluxShellSpout.java +++ b/flux/flux-wrappers/src/main/java/org/apache/storm/flux/wrappers/spouts/FluxShellSpout.java @@ -67,7 +67,7 @@ public FluxShellSpout(String[] args, String[] outputFields) { * className: "org.apache.storm.flux.wrappers.bolts.FluxShellSpout" * constructorArgs: * # command line - * - ["python", "splitsentence.py"] + * - ["python3", "splitsentence.py"] * # output fields * - ["word"] * configMethods: @@ -92,7 +92,7 @@ public void addComponentConfig(String key, Object value) { * className: "org.apache.storm.flux.wrappers.bolts.FluxShellSpout" * constructorArgs: * # command line - * - ["python", "splitsentence.py"] + * - ["python3", "splitsentence.py"] * # output fields * - ["word"] * configMethods: @@ -120,7 +120,7 @@ public void addComponentConfig(String key, List values) { * - className: org.apache.storm.flux.wrappers.bolts.FluxShellSpout * id: my_spout * constructorArgs: - * - [python, my_spout.py] + * - [python3, my_spout.py] * configMethods: * - name: setDefaultStream * args: @@ -141,7 +141,7 @@ public void setDefaultStream(String[] outputFields) { * - className: org.apache.storm.flux.wrappers.bolts.FluxShellSpout * id: my_spout * constructorArgs: - * - [python, my_spout.py] + * - [python3, my_spout.py] * configMethods: * - name: setNamedStream * args: diff --git a/flux/pom.xml b/flux/pom.xml index f585ffd4055..bc06637697f 100644 --- a/flux/pom.xml +++ b/flux/pom.xml @@ -26,7 +26,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/integration-test/pom.xml b/integration-test/pom.xml index e04aae34476..a9068925761 100644 --- a/integration-test/pom.xml +++ b/integration-test/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index d8cd7bf6e2f..2cbf9725dac 100644 --- a/pom.xml +++ b/pom.xml @@ -21,12 +21,12 @@ org.apache apache - 21 + 30 org.apache.storm storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT pom Storm Distributed and fault-tolerant realtime computation @@ -222,7 +222,7 @@ Ethanlm Ethan Li ethanli@apache.org - + Committer -6 @@ -267,10 +267,11 @@ - scm:git:https://gitbox.apache.org/repos/asf/storm.git + git@github.com:apache/storm.git scm:git:https://gitbox.apache.org/repos/asf/storm.git - https://gitbox.apache.org/repos/asf/storm - + https://github.com/apache/storm + v2.5.0 + jira @@ -278,6 +279,11 @@ + 11 + ${maven.compiler.target} + false + false + UTF-8 -Djava.net.preferIPv4Stack=true @@ -300,7 +306,7 @@ 1.1 9.4.45.v20220203 0.2.3 - 1.5.0 + 1.6.0 2.0 4.5.13 2.0.1 @@ -361,7 +367,8 @@ 1.2.1 2.3.0 1.1.1 - 6.27.3 + 8.1.1 + 2.3 provided @@ -701,14 +708,6 @@ - - - storm.maven.website - Storm Website - file:///tmp/site - - - @@ -906,7 +905,7 @@ ${clojure.tools.logging.version} - com.twitter + org.clojars.bipinprasad carbonite ${carbonite.version} @@ -1259,7 +1258,7 @@ maven-pmd-plugin 3.16.0 - 1.8 + ${maven.compiler.target} storm/pmd-ruleset.xml @@ -1432,13 +1431,14 @@ org.apache.maven.plugins maven-compiler-plugin - 1.8 - 1.8 + ${maven.compiler.source} + ${maven.compiler.target} org.apache.maven.plugins maven-release-plugin + 3.0.1 true v@{project.version} @@ -1452,15 +1452,6 @@ org.apache.maven.plugins maven-javadoc-plugin - - ch.raffael.doclets.pegdown.PegdownDoclet - - ch.raffael.pegdown-doclet - pegdown-doclet - 1.1 - - true - org.apache.maven.plugins diff --git a/sql/pom.xml b/sql/pom.xml index 34a40f63b45..a14665f4c2e 100644 --- a/sql/pom.xml +++ b/sql/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/sql/storm-sql-core/pom.xml b/sql/storm-sql-core/pom.xml index 1eb1888ed39..8b55e3d38e5 100644 --- a/sql/storm-sql-core/pom.xml +++ b/sql/storm-sql-core/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/sql/storm-sql-external/storm-sql-hdfs/pom.xml b/sql/storm-sql-external/storm-sql-hdfs/pom.xml index c87f3830cd0..435b24788d9 100644 --- a/sql/storm-sql-external/storm-sql-hdfs/pom.xml +++ b/sql/storm-sql-external/storm-sql-hdfs/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../../pom.xml diff --git a/sql/storm-sql-external/storm-sql-kafka/pom.xml b/sql/storm-sql-external/storm-sql-kafka/pom.xml index c9716b56154..bef822f932b 100644 --- a/sql/storm-sql-external/storm-sql-kafka/pom.xml +++ b/sql/storm-sql-external/storm-sql-kafka/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../../pom.xml diff --git a/sql/storm-sql-external/storm-sql-mongodb/pom.xml b/sql/storm-sql-external/storm-sql-mongodb/pom.xml index d90383f91d4..40bb7238828 100644 --- a/sql/storm-sql-external/storm-sql-mongodb/pom.xml +++ b/sql/storm-sql-external/storm-sql-mongodb/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../../pom.xml diff --git a/sql/storm-sql-external/storm-sql-redis/pom.xml b/sql/storm-sql-external/storm-sql-redis/pom.xml index 23884d27114..07a3f814134 100644 --- a/sql/storm-sql-external/storm-sql-redis/pom.xml +++ b/sql/storm-sql-external/storm-sql-redis/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../../pom.xml diff --git a/sql/storm-sql-runtime/pom.xml b/sql/storm-sql-runtime/pom.xml index 4028ff8384c..101e562d607 100644 --- a/sql/storm-sql-runtime/pom.xml +++ b/sql/storm-sql-runtime/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-buildtools/maven-shade-clojure-transformer/pom.xml b/storm-buildtools/maven-shade-clojure-transformer/pom.xml index c4eed5427aa..6236c84d53b 100644 --- a/storm-buildtools/maven-shade-clojure-transformer/pom.xml +++ b/storm-buildtools/maven-shade-clojure-transformer/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-buildtools/storm-maven-plugins/pom.xml b/storm-buildtools/storm-maven-plugins/pom.xml index 00810359bd8..0a2e3e07198 100644 --- a/storm-buildtools/storm-maven-plugins/pom.xml +++ b/storm-buildtools/storm-maven-plugins/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-checkstyle/pom.xml b/storm-checkstyle/pom.xml index 7059cfbc6ff..e6259e9fd17 100644 --- a/storm-checkstyle/pom.xml +++ b/storm-checkstyle/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml 4.0.0 diff --git a/storm-client/pom.xml b/storm-client/pom.xml index d988280a2b7..77261b9566a 100644 --- a/storm-client/pom.xml +++ b/storm-client/pom.xml @@ -22,7 +22,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/storm-client/src/jvm/org/apache/storm/Config.java b/storm-client/src/jvm/org/apache/storm/Config.java index 0720df29312..60135bc7c18 100644 --- a/storm-client/src/jvm/org/apache/storm/Config.java +++ b/storm-client/src/jvm/org/apache/storm/Config.java @@ -859,9 +859,17 @@ public class Config extends HashMap { @IsString public static final String STORM_DO_AS_USER = "storm.doAsUser"; /** - * The number of machines that should be used by this topology to isolate it from all others. Set storm.scheduler to - * org.apache.storm.scheduler.multitenant.MultitenantScheduler - */ + * The maximum number of machines that should be used by this topology. This configuration can + * be used to isolate topologies from each other. See {@link org.apache.storm.scheduler.multitenant.MultitenantScheduler}. + * Round Robin Strategy uses this value to avoid spreading a topology too + * thinly over a large number of machines - avoiding the the extreme case where the topology would be spread over + * all workers and thus deny scheduling of other topologies. Round Robin scheduling will occupy all the workers on + * this limited number of machines, forcing other topologies to be scheduled on other machines; thus isolating the + * topology from other topologies. + * Set storm.scheduler to {@link org.apache.storm.scheduler.multitenant.MultitenantScheduler} + * Alternatively set storm.scheduler to {@link org.apache.storm.scheduler.resource.ResourceAwareScheduler} + * using {@link #TOPOLOGY_SCHEDULER_STRATEGY} set to + * {@link org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy} */ @IsInteger @IsPositiveNumber public static final String TOPOLOGY_ISOLATED_MACHINES = "topology.isolate.machines"; @@ -1443,6 +1451,7 @@ public class Config extends HashMap { public static final String STORM_MESSAGING_TRANSPORT = "storm.messaging.transport"; /** * Netty based messaging: Is authentication required for Netty messaging from client worker process to server worker process. + * See https://issues.apache.org/jira/browse/STORM-348 for more details */ @IsBoolean public static final String STORM_MESSAGING_NETTY_AUTHENTICATION = "storm.messaging.netty.authentication"; diff --git a/storm-client/src/jvm/org/apache/storm/metric/api/MultiCountMetric.java b/storm-client/src/jvm/org/apache/storm/metric/api/MultiCountMetric.java index d1336656a61..21d89402e13 100644 --- a/storm-client/src/jvm/org/apache/storm/metric/api/MultiCountMetric.java +++ b/storm-client/src/jvm/org/apache/storm/metric/api/MultiCountMetric.java @@ -16,24 +16,28 @@ import java.util.Map; public class MultiCountMetric implements IMetric { - Map value = new HashMap<>(); + final Map value = new HashMap<>(); public MultiCountMetric() { } public CountMetric scope(String key) { - CountMetric val = value.get(key); - if (val == null) { - value.put(key, val = new CountMetric()); + synchronized (value) { + CountMetric val = value.get(key); + if (val == null) { + value.put(key, val = new CountMetric()); + } + return val; } - return val; } @Override public Map getValueAndReset() { Map ret = new HashMap<>(); - for (Map.Entry e : value.entrySet()) { - ret.put(e.getKey(), e.getValue().getValueAndReset()); + synchronized (value) { + for (Map.Entry e : value.entrySet()) { + ret.put(e.getKey(), e.getValue().getValueAndReset()); + } } return ret; } diff --git a/storm-client/src/jvm/org/apache/storm/metric/api/MultiReducedMetric.java b/storm-client/src/jvm/org/apache/storm/metric/api/MultiReducedMetric.java index c9c8590d3aa..9b0efe0a2bf 100644 --- a/storm-client/src/jvm/org/apache/storm/metric/api/MultiReducedMetric.java +++ b/storm-client/src/jvm/org/apache/storm/metric/api/MultiReducedMetric.java @@ -16,28 +16,32 @@ import java.util.Map; public class MultiReducedMetric implements IMetric { - Map value = new HashMap<>(); - IReducer reducer; + final Map value = new HashMap<>(); + final IReducer reducer; public MultiReducedMetric(IReducer reducer) { this.reducer = reducer; } public ReducedMetric scope(String key) { - ReducedMetric val = value.get(key); - if (val == null) { - value.put(key, val = new ReducedMetric(reducer)); + synchronized (value) { + ReducedMetric val = value.get(key); + if (val == null) { + value.put(key, val = new ReducedMetric(reducer)); + } + return val; } - return val; } @Override public Map getValueAndReset() { Map ret = new HashMap<>(); - for (Map.Entry e : value.entrySet()) { - Object val = e.getValue().getValueAndReset(); - if (val != null) { - ret.put(e.getKey(), val); + synchronized (value) { + for (Map.Entry e : value.entrySet()) { + Object val = e.getValue().getValueAndReset(); + if (val != null) { + ret.put(e.getKey(), val); + } } } return ret; diff --git a/storm-client/src/jvm/org/apache/storm/task/ShellBolt.java b/storm-client/src/jvm/org/apache/storm/task/ShellBolt.java index fca718ee86c..6800e625acd 100644 --- a/storm-client/src/jvm/org/apache/storm/task/ShellBolt.java +++ b/storm-client/src/jvm/org/apache/storm/task/ShellBolt.java @@ -63,7 +63,7 @@ * topology that use other languages. For example: * * - *

```java public class MyBolt extends ShellBolt implements IRichBolt { public MyBolt() { super("python", "mybolt.py"); } + *

```java public class MyBolt extends ShellBolt implements IRichBolt { public MyBolt() { super("python3", "mybolt.py"); } * *

public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("field1", "field2")); } } ``` */ diff --git a/storm-clojure-test/pom.xml b/storm-clojure-test/pom.xml index 14bd5942bdd..a5998bf5147 100644 --- a/storm-clojure-test/pom.xml +++ b/storm-clojure-test/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-clojure-test diff --git a/storm-clojure/pom.xml b/storm-clojure/pom.xml index 8ba86d912e7..5ae31809912 100644 --- a/storm-clojure/pom.xml +++ b/storm-clojure/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-clojure @@ -56,8 +56,9 @@ ${kryo.version} - com.twitter + org.clojars.bipinprasad carbonite + ${carbonite.version} compile diff --git a/storm-core/pom.xml b/storm-core/pom.xml index e0e8f3a98c6..4b85f06e64e 100644 --- a/storm-core/pom.xml +++ b/storm-core/pom.xml @@ -20,7 +20,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml @@ -228,6 +228,21 @@ + + + net.minidev + json-smart + ${json-smart.version} + src/jvm @@ -444,9 +459,9 @@ pre-test-jacoco-clean process-test-classes - + - + run diff --git a/storm-core/test/clj/org/apache/storm/metrics_test.clj b/storm-core/test/clj/org/apache/storm/metrics_test.clj index 2689a7a31d5..26816a75261 100644 --- a/storm-core/test/clj/org/apache/storm/metrics_test.clj +++ b/storm-core/test/clj/org/apache/storm/metrics_test.clj @@ -194,7 +194,7 @@ {"2" (mk-shell-bolt-with-metrics-spec {(Utils/getGlobalStreamId "1" nil) (Thrift/prepareGlobalGrouping)} - "python" "tester_bolt_metrics.py")})] + "python3" "tester_bolt_metrics.py")})] (.submitTopology cluster "shell-metrics-tester" {} topology) (.feed feeder ["a"] 1) @@ -226,7 +226,7 @@ "storm.zookeeper.session.timeout" 60000 })))] (let [topology (Thrift/buildTopology - {"1" (mk-shell-spout-with-metrics-spec "python" "tester_spout_metrics.py")} + {"1" (mk-shell-spout-with-metrics-spec "python3" "tester_spout_metrics.py")} {"2" (Thrift/prepareBoltDetails {(Utils/getGlobalStreamId "1" nil) (Thrift/prepareAllGrouping)} diff --git a/storm-core/test/jvm/org/apache/storm/messaging/NettyIntegrationTest.java b/storm-core/test/jvm/org/apache/storm/messaging/NettyIntegrationTest.java index 867cc4e6003..d581a81ccef 100644 --- a/storm-core/test/jvm/org/apache/storm/messaging/NettyIntegrationTest.java +++ b/storm-core/test/jvm/org/apache/storm/messaging/NettyIntegrationTest.java @@ -91,8 +91,12 @@ public void testIntegration() throws Exception { completeTopologyParams.setMockedSources(mockedSources); Map> results = Testing.completeTopology(cluster, topology, completeTopologyParams); - - assertEquals(6 * 4, Testing.readTuples(results, "2").size()); + List> tuplesRead = Testing.readTuples(results, "2"); + String errMsg = "Tuples Read:\n\t" + + String.join("\n\t", tuplesRead.stream().map(Object::toString).collect(Collectors.toList())) + + "\nTuples Expected:\n\t" + + String.join("\n\t", testTuples.stream().map(FixedTuple::toString).collect(Collectors.toList())); + assertEquals(6 * 4, tuplesRead.size(), errMsg); } } } diff --git a/storm-dist/binary/final-package/pom.xml b/storm-dist/binary/final-package/pom.xml index 1ea8029f9fe..0c8a004a87a 100644 --- a/storm-dist/binary/final-package/pom.xml +++ b/storm-dist/binary/final-package/pom.xml @@ -21,7 +21,7 @@ apache-storm-bin org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/storm-dist/binary/pom.xml b/storm-dist/binary/pom.xml index e9e6f41b200..e3456a2d83e 100644 --- a/storm-dist/binary/pom.xml +++ b/storm-dist/binary/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-dist/binary/storm-autocreds-bin/pom.xml b/storm-dist/binary/storm-autocreds-bin/pom.xml index c8f51c7764f..07a276c8a1a 100644 --- a/storm-dist/binary/storm-autocreds-bin/pom.xml +++ b/storm-dist/binary/storm-autocreds-bin/pom.xml @@ -20,7 +20,7 @@ org.apache.storm apache-storm-bin - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-autocreds-bin pom diff --git a/storm-dist/binary/storm-client-bin/pom.xml b/storm-dist/binary/storm-client-bin/pom.xml index 3b37794c32c..564e9ad134f 100644 --- a/storm-dist/binary/storm-client-bin/pom.xml +++ b/storm-dist/binary/storm-client-bin/pom.xml @@ -22,7 +22,7 @@ apache-storm-bin org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/storm-dist/binary/storm-kafka-monitor-bin/pom.xml b/storm-dist/binary/storm-kafka-monitor-bin/pom.xml index f2b892bba2b..0a6b97c21b6 100644 --- a/storm-dist/binary/storm-kafka-monitor-bin/pom.xml +++ b/storm-dist/binary/storm-kafka-monitor-bin/pom.xml @@ -20,7 +20,7 @@ org.apache.storm apache-storm-bin - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-kafka-monitor-bin pom diff --git a/storm-dist/binary/storm-sql-core-bin/pom.xml b/storm-dist/binary/storm-sql-core-bin/pom.xml index 29e37e04908..bbc51e67461 100644 --- a/storm-dist/binary/storm-sql-core-bin/pom.xml +++ b/storm-dist/binary/storm-sql-core-bin/pom.xml @@ -20,7 +20,7 @@ org.apache.storm apache-storm-bin - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-sql-core-bin pom diff --git a/storm-dist/binary/storm-sql-runtime-bin/pom.xml b/storm-dist/binary/storm-sql-runtime-bin/pom.xml index ca795879404..f9420ac30ed 100644 --- a/storm-dist/binary/storm-sql-runtime-bin/pom.xml +++ b/storm-dist/binary/storm-sql-runtime-bin/pom.xml @@ -20,7 +20,7 @@ org.apache.storm apache-storm-bin - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-sql-runtime-bin pom diff --git a/storm-dist/binary/storm-submit-tools-bin/pom.xml b/storm-dist/binary/storm-submit-tools-bin/pom.xml index 57117b20a64..91fea67c924 100644 --- a/storm-dist/binary/storm-submit-tools-bin/pom.xml +++ b/storm-dist/binary/storm-submit-tools-bin/pom.xml @@ -20,7 +20,7 @@ org.apache.storm apache-storm-bin - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT storm-submit-tools-bin pom diff --git a/storm-dist/binary/storm-webapp-bin/pom.xml b/storm-dist/binary/storm-webapp-bin/pom.xml index 91c847f9e64..b1a8398ee6e 100644 --- a/storm-dist/binary/storm-webapp-bin/pom.xml +++ b/storm-dist/binary/storm-webapp-bin/pom.xml @@ -21,7 +21,7 @@ apache-storm-bin org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/storm-dist/source/pom.xml b/storm-dist/source/pom.xml index 4a59869fc53..e0a4bdc01f3 100644 --- a/storm-dist/source/pom.xml +++ b/storm-dist/source/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-multilang/javascript/pom.xml b/storm-multilang/javascript/pom.xml index 3159017c69c..d074ce285b0 100644 --- a/storm-multilang/javascript/pom.xml +++ b/storm-multilang/javascript/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-multilang/python/pom.xml b/storm-multilang/python/pom.xml index 5a745d1cfe0..9ab05aa02f5 100644 --- a/storm-multilang/python/pom.xml +++ b/storm-multilang/python/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-multilang/ruby/pom.xml b/storm-multilang/ruby/pom.xml index d71804e14fd..e3fba6ced81 100644 --- a/storm-multilang/ruby/pom.xml +++ b/storm-multilang/ruby/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../../pom.xml diff --git a/storm-server/pom.xml b/storm-server/pom.xml index 77adf6f90c2..3ec91738936 100644 --- a/storm-server/pom.xml +++ b/storm-server/pom.xml @@ -21,13 +21,17 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml 4.0.0 storm-server + + --add-opens java.base/java.lang=ALL-UNNAMED + + org.apache.storm @@ -60,6 +64,7 @@ org.rocksdb rocksdbjni + ${rocksdb-version} @@ -168,6 +173,7 @@ maven-surefire-plugin 1 + ${storm-server.test.introspection.argLine} -Xmx3g -XX:+HeapDumpOnOutOfMemoryError diff --git a/storm-server/src/main/java/org/apache/storm/metricstore/MetricStoreConfig.java b/storm-server/src/main/java/org/apache/storm/metricstore/MetricStoreConfig.java index c375c065560..22cff7c72dd 100644 --- a/storm-server/src/main/java/org/apache/storm/metricstore/MetricStoreConfig.java +++ b/storm-server/src/main/java/org/apache/storm/metricstore/MetricStoreConfig.java @@ -27,13 +27,19 @@ public class MetricStoreConfig { */ public static MetricStore configure(Map conf, StormMetricsRegistry metricsRegistry) throws MetricException { + String storeClass = "None"; try { - String storeClass = (String) conf.get(DaemonConfig.STORM_METRIC_STORE_CLASS); + storeClass = (String) conf.get(DaemonConfig.STORM_METRIC_STORE_CLASS); MetricStore store = (MetricStore) (Class.forName(storeClass)).newInstance(); store.prepare(conf, metricsRegistry); return store; } catch (Exception e) { - throw new MetricException("Failed to create metric store", e); + String rocksdbSpecificMsg = ""; + if (storeClass.contains("rocksdb") + && System.getenv("ROCKSDB_SHAREDLIB_DIR") == null) { + rocksdbSpecificMsg = ", missing env var ROCKSDB_SHAREDLIB_DIR required to load JNI library in org.rocksdb.RocksDB class"; + } + throw new MetricException("Failed to create metric store using store class " + storeClass + rocksdbSpecificMsg, e); } } diff --git a/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/RocksDbStore.java b/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/RocksDbStore.java index 89e4cec6eca..59355c1ed8d 100644 --- a/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/RocksDbStore.java +++ b/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/RocksDbStore.java @@ -75,7 +75,7 @@ public void prepare(Map config, StormMetricsRegistry metricsRegi options.useCappedPrefixExtractor(RocksDbKey.KEY_SIZE); String path = getRocksDbAbsoluteDir(config); - LOG.info("Opening RocksDB from {}", path); + LOG.info("Opening RocksDB from {}, {}={}", path, DaemonConfig.STORM_ROCKSDB_CREATE_IF_MISSING, createIfMissing); db = RocksDB.open(options, path); } catch (RocksDBException e) { String message = "Error opening RockDB database"; diff --git a/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/StringMetadataCache.java b/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/StringMetadataCache.java index e8428c3310e..5ee50a750b4 100644 --- a/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/StringMetadataCache.java +++ b/storm-server/src/main/java/org/apache/storm/metricstore/rocksdb/StringMetadataCache.java @@ -51,17 +51,17 @@ private StringMetadataCache(RocksDbMetricsWriter dbWriter, int capacity) { } /** - * Initializes the cache instance. + * Initializes the cache instance. Should be called only once in the JVM, subsequent calls + * will be ignored unless preceded by {@link #init(RocksDbMetricsWriter, int)}. * * @param dbWriter the RocksDB writer instance to handle writing evicted cache data * @param capacity the number of StringMetadata instances to hold in memory - * @throws MetricException if creating multiple cache instances */ - static void init(RocksDbMetricsWriter dbWriter, int capacity) throws MetricException { + static void init(RocksDbMetricsWriter dbWriter, int capacity) { if (instance == null) { instance = new StringMetadataCache(dbWriter, capacity); } else { - throw new MetricException("StringMetadataCache already created"); + LOG.error("Ignoring call to init() since StringMetadataCache already created"); } } diff --git a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/BaseResourceAwareStrategy.java b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/BaseResourceAwareStrategy.java index bdd89ffea7c..94550a1fe40 100644 --- a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/BaseResourceAwareStrategy.java +++ b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/BaseResourceAwareStrategy.java @@ -164,6 +164,7 @@ public SchedulingResult schedule(Cluster cluster, TopologyDetails td) { //order executors to be scheduled List orderedExecutors = execSorter.sortExecutors(unassignedExecutors); + isolateAckersToEnd(orderedExecutors); Iterable sortedNodes = null; if (!this.sortNodesForEachExecutor) { nodeSorter.prepare(null); @@ -403,20 +404,26 @@ public RasNode idToNode(String id) { return ret; } + /** + * Modify the collection, and place unassigned ackers to the end of the list. + * + * @param orderedExecutors List of executors that are presumed to be sorted. + */ + private void isolateAckersToEnd(List orderedExecutors) { + orderedExecutors.removeAll(searcherState.getUnassignedAckers()); + orderedExecutors.addAll(searcherState.getUnassignedAckers()); + LOG.debug("For topology: {}, we have sorted execs: {} and unassigned ackers: {}", + topoName, orderedExecutors, searcherState.getUnassignedAckers()); + } + /** * Try to schedule till successful or till limits (backtrack count or time) have been exceeded. * - * @param orderedExecutors Executors sorted in the preferred order cannot be null. + * @param orderedExecutors Executors sorted in the preferred order cannot be null - note that ackers are isolated at the end. * @param sortedNodesIter Node iterable which may be null. * @return SchedulingResult with success attribute set to true or false indicting whether ALL executors were assigned. */ protected SchedulingResult scheduleExecutorsOnNodes(List orderedExecutors, Iterable sortedNodesIter) { - // isolate ackers and put it to the end of orderedExecutors - // the order of unassigned ackers in orderedExecutors and searcherState.getUnassignedAckers() are same - orderedExecutors.removeAll(searcherState.getUnassignedAckers()); - orderedExecutors.addAll(searcherState.getUnassignedAckers()); - LOG.debug("For topology: {}, we have sorted execs: {} and unassigned ackers: {}", - topoName, orderedExecutors, searcherState.getUnassignedAckers()); long startTimeMilli = Time.currentTimeMillis(); searcherState.setSortedExecs(orderedExecutors); @@ -487,7 +494,7 @@ protected SchedulingResult scheduleExecutorsOnNodes(List ordere if (!isExecAssignmentToWorkerValid(exec, workerSlot)) { // exec can't fit in this workerSlot, try next workerSlot - LOG.debug("Failed to assign exec={}, comp={}, topo={} to worker={} on node=({}, availCpu={}, availMem={}).", + LOG.trace("Failed to assign exec={}, comp={}, topo={} to worker={} on node=({}, availCpu={}, availMem={}).", exec, comp, topoName, workerSlot, node.getId(), node.getAvailableCpuResources(), node.getAvailableMemoryResources()); continue; @@ -504,7 +511,7 @@ protected SchedulingResult scheduleExecutorsOnNodes(List ordere if (searcherState.areAllExecsScheduled()) { //Everything is scheduled correctly, so no need to search any more. LOG.info("scheduleExecutorsOnNodes: Done at loopCnt={} in {}ms, state.elapsedtime={}, backtrackCnt={}, topo={}", - loopCnt, System.currentTimeMillis() - startTimeMilli, + loopCnt, Time.currentTimeMillis() - startTimeMilli, Time.currentTimeMillis() - searcherState.startTimeMillis, searcherState.getNumBacktrack(), topoName); @@ -527,13 +534,13 @@ protected SchedulingResult scheduleExecutorsOnNodes(List ordere if (execIndex == 0) { break; } else { - searcherState.backtrack(execToComp, nodeForExec[execIndex - 1], workerSlotForExec[execIndex - 1]); + searcherState.backtrack(execToComp, nodeForExec, workerSlotForExec); progressIdxForExec[execIndex] = -1; } } boolean success = searcherState.areAllExecsScheduled(); LOG.info("scheduleExecutorsOnNodes: Scheduled={} in {} milliseconds, state.elapsedtime={}, backtrackCnt={}, topo={}", - success, System.currentTimeMillis() - startTimeMilli, Time.currentTimeMillis() - searcherState.startTimeMillis, + success, Time.currentTimeMillis() - startTimeMilli, Time.currentTimeMillis() - searcherState.startTimeMillis, searcherState.getNumBacktrack(), topoName); return searcherState.createSchedulingResult(success, this.getClass().getSimpleName()); @@ -556,7 +563,7 @@ protected SchedulingResult scheduleExecutorsOnNodes(List ordere * @param workerSlot WorkerSlot on which to schedule. * @return Number of ackers assigned. */ - private int assignBoundAckersForNewWorkerSlot(ExecutorDetails exec, RasNode node, WorkerSlot workerSlot) { + protected int assignBoundAckersForNewWorkerSlot(ExecutorDetails exec, RasNode node, WorkerSlot workerSlot) { int numOfAckersToBind = searcherState.getNumOfAckersToBind(exec, workerSlot); if (numOfAckersToBind > 0) { for (int i = 0; i < numOfAckersToBind; i++) { diff --git a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/ConstraintSolverConfig.java b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/ConstraintSolverConfig.java index 41eed696d58..84dca6bc5a3 100644 --- a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/ConstraintSolverConfig.java +++ b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/ConstraintSolverConfig.java @@ -21,7 +21,6 @@ import java.util.Set; import org.apache.storm.Config; -import org.apache.storm.scheduler.ExecutorDetails; import org.apache.storm.scheduler.TopologyDetails; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/RoundRobinResourceAwareStrategy.java b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/RoundRobinResourceAwareStrategy.java new file mode 100644 index 00000000000..c0bbcc3fcec --- /dev/null +++ b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/RoundRobinResourceAwareStrategy.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.storm.scheduler.resource.strategies.scheduling; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.storm.Config; +import org.apache.storm.scheduler.ExecutorDetails; +import org.apache.storm.scheduler.WorkerSlot; +import org.apache.storm.scheduler.resource.RasNode; +import org.apache.storm.scheduler.resource.SchedulingResult; +import org.apache.storm.utils.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RoundRobinResourceAwareStrategy extends BaseResourceAwareStrategy { + private static final Logger LOG = LoggerFactory.getLogger(RoundRobinResourceAwareStrategy.class); + + public RoundRobinResourceAwareStrategy() { + super(false, NodeSortType.COMMON); + } + + /** + * Maximum number of isolated nodes being requested based on the topology configuration + * {@link Config#TOPOLOGY_ISOLATED_MACHINES}. + */ + private int getMaxNumberOfNodesRequested() { + Map conf = topologyDetails.getConf(); + if (conf.get(Config.TOPOLOGY_ISOLATED_MACHINES) == null) { + return Integer.MAX_VALUE; + } else { + return ((Number) topologyDetails.getConf().get(Config.TOPOLOGY_ISOLATED_MACHINES)).intValue(); + } + } + + /** + * If the number of machines is limited, then truncate the node list to this maximum number of nodes + * that have no other topologies running on it. If the current topology is running on it, then it + * is subject to selection in the list. If other topologies are running on it, then it is not selected. + * + * @param sortedNodesIterable Iterable of nodes + * @return an ArrayList of nodes + */ + private ArrayList getTruncatedNodeList(Iterable sortedNodesIterable) { + final int maxNodes = getMaxNumberOfNodesRequested(); + final ArrayList ret = new ArrayList<>(); + sortedNodesIterable.forEach(node -> { + if (ret.size() < maxNodes) { + RasNode rasNode = nodes.getNodeById(node); + Collection runningTopos = rasNode.getRunningTopologies(); + if (runningTopos.isEmpty() || runningTopos.size() == 1 && runningTopos.contains(topologyDetails.getId())) { + ret.add(node); + } + } + }); + return ret; + } + + /** + * For each component try to schedule executors in sequence on the nodes. + * + * @param orderedExecutors Executors sorted in the preferred order cannot be null + * @param sortedNodesIterable Node iterable which cannot be null, relies on behavior when {@link #sortNodesForEachExecutor} is false + * @return SchedulingResult with success attribute set to true or false indicting whether ALL executors were assigned. @{#} + */ + @Override + protected SchedulingResult scheduleExecutorsOnNodes(List orderedExecutors, Iterable sortedNodesIterable) { + long startTimeMilli = Time.currentTimeMillis(); + int maxExecCnt = searcherState.getExecSize(); + int nodeSortCnt = 1; + Iterator sortedNodesIter = null; + ArrayList sortedNodes = getTruncatedNodeList(sortedNodesIterable); + + LOG.debug("scheduleExecutorsOnNodes: will assign {} executors for topo {}", maxExecCnt, topoName); + + searcherState.setSortedExecs(orderedExecutors); + + OUTERMOST_LOOP: + for (int loopCnt = 0 ; true ; loopCnt++) { + LOG.debug("scheduleExecutorsOnNodes: loopCnt={}, execIndex={}, topo={}, nodeSortCnt={}", + loopCnt, searcherState.getExecIndex(), topoName, nodeSortCnt); + if (searcherState.areSearchLimitsExceeded()) { + LOG.warn("Limits exceeded, loopCnt={}, topo={}, nodeSortCnt={}", loopCnt, topoName, nodeSortCnt); + return searcherState.createSchedulingResult(false, this.getClass().getSimpleName()); + } + + if (Thread.currentThread().isInterrupted()) { + return searcherState.createSchedulingResult(false, this.getClass().getSimpleName()); + } + + int execIndex = searcherState.getExecIndex(); + ExecutorDetails exec = searcherState.currentExec(); + + // If current exec is found in searcherState assigned Ackers, + // it means it has been assigned as a bound acker already. + // So we skip to the next. + if (searcherState.getBoundAckers().contains(exec)) { + if (searcherState.areAllExecsScheduled()) { + //Everything is scheduled correctly, so no need to search any more. + LOG.info("scheduleExecutorsOnNodes: Done at loopCnt={} in {}ms, state.elapsedtime={}, topo={}, nodeSortCnt={}", + loopCnt, Time.currentTimeMillis() - startTimeMilli, + Time.currentTimeMillis() - searcherState.getStartTimeMillis(), + topoName, nodeSortCnt); + return searcherState.createSchedulingResult(true, this.getClass().getSimpleName()); + } + searcherState = searcherState.nextExecutor(); + continue OUTERMOST_LOOP; + } + + String comp = execToComp.get(exec); + // start at the beginning of node list when component changes or when at end of nodes + if (sortedNodesIter == null || searcherState.isExecCompDifferentFromPrior() || !sortedNodesIter.hasNext()) { + sortedNodesIter = sortedNodes.iterator(); + nodeSortCnt++; + } + + while (sortedNodesIter.hasNext()) { + String nodeId = sortedNodesIter.next(); + RasNode node = nodes.getNodeById(nodeId); + if (!node.couldEverFit(exec, topologyDetails)) { + continue; + } + for (WorkerSlot workerSlot : node.getSlotsAvailableToScheduleOn()) { + if (!isExecAssignmentToWorkerValid(exec, workerSlot)) { + // exec can't fit in this workerSlot, try next workerSlot + LOG.trace("Failed to assign exec={}, comp={}, topo={} to worker={} on node=({}, availCpu={}, availMem={}).", + exec, comp, topoName, workerSlot, + node.getId(), node.getAvailableCpuResources(), node.getAvailableMemoryResources()); + continue; + } + + searcherState.incStatesSearched(); + searcherState.assignCurrentExecutor(execToComp, node, workerSlot); + int numBoundAckerAssigned = assignBoundAckersForNewWorkerSlot(exec, node, workerSlot); + if (numBoundAckerAssigned > 0) { + // This exec with some of its bounded ackers have all been successfully assigned + searcherState.getExecsWithBoundAckers().add(exec); + } + + if (searcherState.areAllExecsScheduled()) { + //Everything is scheduled correctly, so no need to search any more. + LOG.info("scheduleExecutorsOnNodes: Done at loopCnt={} in {}ms, state.elapsedtime={}, topo={}, nodeSortCnt={}", + loopCnt, Time.currentTimeMillis() - startTimeMilli, + Time.currentTimeMillis() - searcherState.getStartTimeMillis(), + topoName, nodeSortCnt); + return searcherState.createSchedulingResult(true, this.getClass().getSimpleName()); + } + searcherState = searcherState.nextExecutor(); + LOG.debug("scheduleExecutorsOnNodes: Assigned execId={}, comp={} to node={}/cpu={}/mem={}, " + + "worker-port={} at loopCnt={}, topo={}, nodeSortCnt={}", + execIndex, comp, nodeId, node.getAvailableCpuResources(), node.getAvailableMemoryResources(), + workerSlot.getPort(), loopCnt, topoName, nodeSortCnt); + continue OUTERMOST_LOOP; + } + } + // if here, then the executor was not assigned, scheduling failed + LOG.debug("scheduleExecutorsOnNodes: Failed to schedule execId={}, comp={} at loopCnt={}, topo={}, nodeSortCnt={}", + execIndex, comp, loopCnt, topoName, nodeSortCnt); + break; + } + boolean success = searcherState.areAllExecsScheduled(); + LOG.info("scheduleExecutorsOnNodes: Scheduled={} in {} milliseconds, state.elapsedtime={}, topo={}, nodeSortCnt={}", + success, Time.currentTimeMillis() - startTimeMilli, Time.currentTimeMillis() - searcherState.getStartTimeMillis(), + topoName, nodeSortCnt); + return searcherState.createSchedulingResult(success, this.getClass().getSimpleName()); + } +} diff --git a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/SchedulingSearcherState.java b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/SchedulingSearcherState.java index be59c1ec5d2..13b11fcaf57 100644 --- a/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/SchedulingSearcherState.java +++ b/storm-server/src/main/java/org/apache/storm/scheduler/resource/strategies/scheduling/SchedulingSearcherState.java @@ -162,10 +162,6 @@ public int getExecIndex() { return execIndex; } - public int getAckersPerWorker() { - return ackersPerWorker; - } - public LinkedList getUnassignedAckers() { return unassignedAckers; } @@ -182,10 +178,6 @@ public Set getExecsWithBoundAckers() { return execsWithBoundAckers; } - public Map> getWorkerSlotToBoundAckers() { - return workerSlotToBoundAckers; - } - public boolean areSearchLimitsExceeded() { return statesSearched > maxStatesSearched || Time.currentTimeMillis() > maxEndTimeMs; } @@ -282,9 +274,34 @@ public int getNumOfAckersToBind(ExecutorDetails exec, WorkerSlot workerSlot) { return 0; } - public void backtrack(Map execToComp, RasNode node, WorkerSlot workerSlot) { + /** + * Backtrack to prior executor that was directly assigned. This excludes bound-ackers. + * + * @param execToComp map from executor to component. + * @param nodesForExec array of nodes for all execIndex - has null values for bound-acker indices. + * @param workerSlotForExec array of workerSlots for all execIndex - has null values for bound-acker indices. + */ + public void backtrack(Map execToComp, RasNode[] nodesForExec, WorkerSlot[] workerSlotForExec) { execIndex--; - // when backtrack, we need to skip over the bound ackers + /* + After decrementing execIndex, it is expected to point to the target executor to backtrack to. + However, due to the way assignment occurs, this is not the case. Executors are ordered in the following + sequence + - Non-Acker-Executor + - Bound-Ackers + - Unbound-Ackers + However, Assignment is in the following order: + - repeated sequence of: + - Non-Acker-Executor + - Its-Bound-Ackers + - Unbound-Ackers (if any left over) + Additionally, execIndex is only updated when Non-Acker-Executor and Unbound-Executors are assigned. + To ensure that the counting is correct, the execIndex is incremented when Bound-Ackers are encountered. + However, nodesForExec and workerSlotForExec are not set for bound-ackers, and the execIndex value gets in sync + after all the bound-ackers are skipped. + */ + + // back over any executors that were not assigned directly - i.e. bound-ackers while (execIndex >= 0 && boundAckers.contains(execs.get(execIndex))) { execIndex--; } @@ -294,6 +311,8 @@ public void backtrack(Map execToComp, RasNode node, Wor numBacktrack++; ExecutorDetails exec = currentExec(); String comp = execToComp.get(exec); + RasNode node = nodesForExec[execIndex]; + WorkerSlot workerSlot = workerSlotForExec[execIndex]; LOG.trace("Topology {} Backtracking {} {} from {}", topoName, exec, comp, workerSlot); if (okToRemoveFromWorker[execIndex]) { Map compToAssignmentCount = workerCompAssignmentCnts.get(workerSlot); @@ -316,6 +335,7 @@ public void backtrack(Map execToComp, RasNode node, Wor // If this exec has bound ackers, we need to backtrack them as well if (execsWithBoundAckers.remove(exec)) { if (workerSlotToBoundAckers.containsKey(workerSlot)) { + // Note that bound-ackers for this (and only this) executor are on the workerSlot freeWorkerSlotWithBoundAckers(node, workerSlot); } } @@ -351,10 +371,11 @@ public void assignSingleBoundAcker(RasNode node, WorkerSlot workerSlot) { } /** - * Free a given workerSlot and all the assigned bound ackers already there. + * Free the bound-ackers for the given node and workerSlot. + * All the bound-ackers for an executor (and only that executor) are on the same workerSlot. * - * @param node RasNode which to be freed. - * @param workerSlot WorkerSlot on which to schedule. + * @param node RasNode to be freed. + * @param workerSlot WorkerSlot to be freed. */ public void freeWorkerSlotWithBoundAckers(RasNode node, WorkerSlot workerSlot) { List ackers = workerSlotToBoundAckers.get(workerSlot); @@ -378,6 +399,7 @@ public void freeWorkerSlotWithBoundAckers(RasNode node, WorkerSlot workerSlot) { nodeToAssignmentCount.remove(ackerCompId); } } + // Note: all the bound-ackers for the same executor (and no other) are on one workerSlot workerSlotToBoundAckers.remove(workerSlot); node.free(workerSlot); } diff --git a/storm-server/src/test/java/org/apache/storm/TestRebalance.java b/storm-server/src/test/java/org/apache/storm/TestRebalance.java index 53bd0719abc..3800809d13d 100644 --- a/storm-server/src/test/java/org/apache/storm/TestRebalance.java +++ b/storm-server/src/test/java/org/apache/storm/TestRebalance.java @@ -14,7 +14,6 @@ import java.util.HashMap; import java.util.Map; -import org.apache.storm.generated.AuthorizationException; import org.apache.storm.generated.ClusterSummary; import org.apache.storm.generated.RebalanceOptions; import org.apache.storm.generated.NotAliveException; @@ -24,6 +23,10 @@ import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler; import org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.thrift.TException; import org.apache.storm.topology.BoltDeclarer; import org.apache.storm.topology.SpoutDeclarer; @@ -39,6 +42,13 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class TestRebalance { + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + DefaultResourceAwareStrategyOld.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; static final int SLEEP_TIME_BETWEEN_RETRY = 1000; @@ -54,93 +64,90 @@ public static String topoNameToId(String topoName, ILocalCluster cluster) throws return null; } - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - @Test public void testRebalanceTopologyResourcesAndConfigs() throws Exception { + for (Class strategyClass : strategyClasses) { + LOG.info("Starting local cluster...using ", strategyClass.getName()); - LOG.info("Starting local cluster..."); - - Config conf = new Config(); - conf.put(DaemonConfig.STORM_SCHEDULER, ResourceAwareScheduler.class.getName()); - conf.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, DefaultSchedulingPriorityStrategy.class.getName()); - conf.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); - conf.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 10.0); - conf.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 10.0); - conf.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 100.0); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); - Map resourcesMap = new HashMap(); - resourcesMap.put("gpu.count", 5.0); - conf.put(Config.TOPOLOGY_COMPONENT_RESOURCES_MAP, resourcesMap); + Config conf = new Config(); + conf.put(DaemonConfig.STORM_SCHEDULER, ResourceAwareScheduler.class.getName()); + conf.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, DefaultSchedulingPriorityStrategy.class.getName()); + conf.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); + conf.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 10.0); + conf.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 10.0); + conf.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 100.0); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + Map resourcesMap = new HashMap(); + resourcesMap.put("gpu.count", 5.0); + conf.put(Config.TOPOLOGY_COMPONENT_RESOURCES_MAP, resourcesMap); - try (ILocalCluster cluster = new LocalCluster.Builder().withDaemonConf(conf).build()) { + try (ILocalCluster cluster = new LocalCluster.Builder().withDaemonConf(conf).build()) { - TopologyBuilder builder = new TopologyBuilder(); - SpoutDeclarer s1 = builder.setSpout("spout-1", new TestUtilsForResourceAwareScheduler.TestSpout(), - 2); - BoltDeclarer b1 = builder.setBolt("bolt-1", new TestUtilsForResourceAwareScheduler.TestBolt(), - 2).shuffleGrouping("spout-1"); - BoltDeclarer b2 = builder.setBolt("bolt-2", new TestUtilsForResourceAwareScheduler.TestBolt(), - 2).shuffleGrouping("bolt-1"); + TopologyBuilder builder = new TopologyBuilder(); + SpoutDeclarer s1 = builder.setSpout("spout-1", new TestUtilsForResourceAwareScheduler.TestSpout(), + 2); + BoltDeclarer b1 = builder.setBolt("bolt-1", new TestUtilsForResourceAwareScheduler.TestBolt(), + 2).shuffleGrouping("spout-1"); + BoltDeclarer b2 = builder.setBolt("bolt-2", new TestUtilsForResourceAwareScheduler.TestBolt(), + 2).shuffleGrouping("bolt-1"); - StormTopology stormTopology = builder.createTopology(); + StormTopology stormTopology = builder.createTopology(); - LOG.info("submitting topologies...."); - String topoName = "topo1"; - cluster.submitTopology(topoName, new HashMap<>(), stormTopology); + LOG.info("submitting topologies...."); + String topoName = "topo1"; + cluster.submitTopology(topoName, new HashMap<>(), stormTopology); - waitTopologyScheduled(topoName, cluster, 20); + waitTopologyScheduled(topoName, cluster, 20); - RebalanceOptions opts = new RebalanceOptions(); + RebalanceOptions opts = new RebalanceOptions(); - Map> resources = new HashMap>(); - resources.put("spout-1", new HashMap()); - resources.get("spout-1").put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 120.0); - resources.get("spout-1").put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 25.0); - resources.get("spout-1").put("gpu.count", 5.0); + Map> resources = new HashMap>(); + resources.put("spout-1", new HashMap()); + resources.get("spout-1").put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 120.0); + resources.get("spout-1").put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 25.0); + resources.get("spout-1").put("gpu.count", 5.0); - opts.set_topology_resources_overrides(resources); - opts.set_wait_secs(0); + opts.set_topology_resources_overrides(resources); + opts.set_wait_secs(0); - JSONObject jsonObject = new JSONObject(); - jsonObject.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 768.0); + JSONObject jsonObject = new JSONObject(); + jsonObject.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 768.0); - opts.set_topology_conf_overrides(jsonObject.toJSONString()); + opts.set_topology_conf_overrides(jsonObject.toJSONString()); - LOG.info("rebalancing...."); - cluster.rebalance("topo1", opts); + LOG.info("rebalancing...."); + cluster.rebalance("topo1", opts); - waitTopologyScheduled(topoName, cluster, 10); + waitTopologyScheduled(topoName, cluster, 10); - boolean topologyUpdated = false; - JSONParser parser = new JSONParser(); + boolean topologyUpdated = false; + JSONParser parser = new JSONParser(); - for (int i = 0; i < 5; i++) { - Utils.sleep(SLEEP_TIME_BETWEEN_RETRY); + for (int i = 0; i < 5; i++) { + Utils.sleep(SLEEP_TIME_BETWEEN_RETRY); - String confRaw = cluster.getTopologyConf(topoNameToId(topoName, cluster)); + String confRaw = cluster.getTopologyConf(topoNameToId(topoName, cluster)); - JSONObject readConf = (JSONObject) parser.parse(confRaw); - if (768.0 == (double) readConf.get(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB)) { - topologyUpdated = true; - break; + JSONObject readConf = (JSONObject) parser.parse(confRaw); + if (768.0 == (double) readConf.get(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB)) { + topologyUpdated = true; + break; + } } - } - StormTopology readStormTopology = cluster.getTopology(topoNameToId(topoName, cluster)); - String componentConfRaw = readStormTopology.get_spouts().get("spout-1").get_common().get_json_conf(); + StormTopology readStormTopology = cluster.getTopology(topoNameToId(topoName, cluster)); + String componentConfRaw = readStormTopology.get_spouts().get("spout-1").get_common().get_json_conf(); - JSONObject readTopologyConf = (JSONObject) parser.parse(componentConfRaw); + JSONObject readTopologyConf = (JSONObject) parser.parse(componentConfRaw); - Map componentResources = (Map) readTopologyConf.get(Config.TOPOLOGY_COMPONENT_RESOURCES_MAP); - assertTrue(topologyUpdated, "Topology has been updated"); - assertEquals(25.0, componentResources.get(Constants.COMMON_CPU_RESOURCE_NAME), 0.001, "Updated CPU correct"); - assertEquals(120.0, componentResources.get(Constants.COMMON_ONHEAP_MEMORY_RESOURCE_NAME), 0.001, "Updated Memory correct"); - assertEquals(5.0, componentResources.get("gpu.count"), 0.001, "Updated Generic resource correct"); + Map componentResources = (Map) readTopologyConf.get(Config.TOPOLOGY_COMPONENT_RESOURCES_MAP); + assertTrue(topologyUpdated, "Topology has been updated"); + assertEquals(25.0, componentResources.get(Constants.COMMON_CPU_RESOURCE_NAME), 0.001, "Updated CPU correct"); + assertEquals(120.0, componentResources.get(Constants.COMMON_ONHEAP_MEMORY_RESOURCE_NAME), 0.001, "Updated Memory correct"); + assertEquals(5.0, componentResources.get("gpu.count"), 0.001, "Updated Generic resource correct"); + } } } diff --git a/storm-server/src/test/java/org/apache/storm/daemon/nimbus/NimbusTest.java b/storm-server/src/test/java/org/apache/storm/daemon/nimbus/NimbusTest.java index 187ad0d52c3..002827ed42d 100644 --- a/storm-server/src/test/java/org/apache/storm/daemon/nimbus/NimbusTest.java +++ b/storm-server/src/test/java/org/apache/storm/daemon/nimbus/NimbusTest.java @@ -29,6 +29,8 @@ import org.apache.storm.generated.StormTopology; import org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.testing.TestWordSpout; import org.apache.storm.topology.TopologyBuilder; import org.apache.storm.utils.ServerUtils; @@ -41,10 +43,6 @@ import static org.junit.jupiter.api.Assertions.assertNull; public class NimbusTest { - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - @Test public void testMemoryLoadLargerThanMaxHeapSize() { // Topology will not be able to be successfully scheduled: Config TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB=128.0 < 129.0, @@ -56,18 +54,25 @@ public void testMemoryLoadLargerThanMaxHeapSize() { config1.put(Config.STORM_NETWORK_TOPOGRAPHY_PLUGIN, "org.apache.storm.networktopography.DefaultRackDNSToSwitchMapping"); config1.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, DefaultSchedulingPriorityStrategy.class.getName()); - config1.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); config1.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 10.0); config1.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 0.0); config1.put(Config.TOPOLOGY_PRIORITY, 0); config1.put(Config.TOPOLOGY_SUBMITTER_USER, "zhuo"); config1.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 128.0); config1.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 129.0); - try { - ServerUtils.validateTopologyWorkerMaxHeapSizeConfigs(config1, stormTopology1, 768.0); - fail("Expected exception not thrown"); - } catch (InvalidTopologyException e) { - //Expected... + Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class}; + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + config1.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClassName); + try { + ServerUtils.validateTopologyWorkerMaxHeapSizeConfigs(config1, stormTopology1, 768.0); + fail("Expected exception not thrown when using Strategy " + strategyClassName); + } catch (InvalidTopologyException e) { + //Expected... + } } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/blacklist/TestBlacklistScheduler.java b/storm-server/src/test/java/org/apache/storm/scheduler/blacklist/TestBlacklistScheduler.java index 8353c7d68c6..bed28ce218e 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/blacklist/TestBlacklistScheduler.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/blacklist/TestBlacklistScheduler.java @@ -32,6 +32,10 @@ import org.apache.storm.scheduler.TopologyDetails; import org.apache.storm.scheduler.resource.ResourceAwareScheduler; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.utils.Utils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; @@ -51,6 +55,7 @@ import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; public class TestBlacklistScheduler { @@ -60,10 +65,6 @@ public class TestBlacklistScheduler { private int currentTime = 1468216504; private IScheduler scheduler = null; - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - @AfterEach public void cleanup() { if (scheduler != null) { @@ -244,39 +245,56 @@ public void TestGreylist() { config.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 0.0); config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 0); config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 0); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); config.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true); - Map topoMap = new HashMap<>(); - - TopologyDetails topo1 = TestUtilsForBlacklistScheduler.getTopology("topo-1", config, 1, 1, 1, 1, currentTime - 2, true); - TopologyDetails topo2 = TestUtilsForBlacklistScheduler.getTopology("topo-2", config, 1, 1, 1, 1, currentTime - 8, true); - Topologies topologies = new Topologies(topoMap); - - StormMetricsRegistry metricsRegistry = new StormMetricsRegistry(); - ResourceMetrics resourceMetrics = new ResourceMetrics(metricsRegistry); - Cluster cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap(), topologies, config); - scheduler = new BlacklistScheduler(new ResourceAwareScheduler()); - - scheduler.prepare(config, metricsRegistry); - scheduler.schedule(topologies, cluster); - cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); - scheduler.schedule(topologies, cluster); - cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); - scheduler.schedule(topologies, cluster); - cluster = new Cluster(iNimbus, resourceMetrics, supMap, TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); - scheduler.schedule(topologies, cluster); - assertEquals(Collections.singleton("host-0"), cluster.getBlacklistedHosts(), "blacklist"); - - topoMap.put(topo1.getId(), topo1); - topoMap.put(topo2.getId(), topo2); - topologies = new Topologies(topoMap); - cluster = new Cluster(iNimbus, resourceMetrics, supMap, TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); - scheduler.schedule(topologies, cluster); - assertEquals(Collections.emptySet(), cluster.getBlacklistedHosts(), "blacklist"); - assertEquals(Collections.singletonList("sup-0"), cluster.getGreyListedSupervisors(), "greylist"); - LOG.debug("Now only these slots remain available: {}", cluster.getAvailableSlots()); - assertTrue(cluster.getAvailableSlots(supMap.get("sup-0")).containsAll(cluster.getAvailableSlots())); + Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + DefaultResourceAwareStrategyOld.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClassName); + { + Map topoMap = new HashMap<>(); + + TopologyDetails topo1 = TestUtilsForBlacklistScheduler.getTopology("topo-1", config, 1, 1, 1, 1, currentTime - 2, true); + TopologyDetails topo2 = TestUtilsForBlacklistScheduler.getTopology("topo-2", config, 1, 1, 1, 1, currentTime - 8, true); + Topologies topologies = new Topologies(topoMap); + + StormMetricsRegistry metricsRegistry = new StormMetricsRegistry(); + ResourceMetrics resourceMetrics = new ResourceMetrics(metricsRegistry); + Cluster cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap(), topologies, config); + scheduler = new BlacklistScheduler(new ResourceAwareScheduler()); + + scheduler.prepare(config, metricsRegistry); + scheduler.schedule(topologies, cluster); + cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); + scheduler.schedule(topologies, cluster); + cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); + scheduler.schedule(topologies, cluster); + cluster = new Cluster(iNimbus, resourceMetrics, supMap, TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); + scheduler.schedule(topologies, cluster); + assertEquals(Collections.singleton("host-0"), cluster.getBlacklistedHosts(), "blacklist"); + + topoMap.put(topo1.getId(), topo1); + topoMap.put(topo2.getId(), topo2); + topologies = new Topologies(topoMap); + cluster = new Cluster(iNimbus, resourceMetrics, supMap, TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); + scheduler.schedule(topologies, cluster); + assertEquals(Collections.emptySet(), cluster.getBlacklistedHosts(), "blacklist using " + strategyClassName); + assertEquals(Collections.singletonList("sup-0"), cluster.getGreyListedSupervisors(), "greylist using" + strategyClassName); + LOG.debug("{}: Now only these slots remain available: {}", strategyClassName, cluster.getAvailableSlots()); + if (strategyClass == RoundRobinResourceAwareStrategy.class) { + // available slots will be across supervisors + assertFalse(cluster.getAvailableSlots(supMap.get("sup-0")).containsAll(cluster.getAvailableSlots()), "using " + strategyClassName); + } else { + assertTrue(cluster.getAvailableSlots(supMap.get("sup-0")).containsAll(cluster.getAvailableSlots()), "using " + strategyClassName); + } + } + } } @Test diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestResourceAwareScheduler.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestResourceAwareScheduler.java index 5355a4e8e16..0a69f66e146 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestResourceAwareScheduler.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestResourceAwareScheduler.java @@ -12,6 +12,7 @@ package org.apache.storm.scheduler.resource; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -30,6 +31,7 @@ import org.apache.storm.DaemonConfig; import org.apache.storm.generated.StormTopology; import org.apache.storm.generated.WorkerResources; +import org.apache.storm.metric.StormMetricsRegistry; import org.apache.storm.scheduler.Cluster; import org.apache.storm.scheduler.ExecutorDetails; import org.apache.storm.scheduler.INimbus; @@ -46,6 +48,9 @@ import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; +import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; import org.apache.storm.testing.PerformanceTest; import org.apache.storm.testing.TestWordCounter; import org.apache.storm.testing.TestWordSpout; @@ -57,6 +62,8 @@ import org.apache.storm.utils.Utils; import org.apache.storm.validation.ConfigValidation; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,44 +72,29 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; -import java.time.Duration; -import org.apache.storm.metric.StormMetricsRegistry; -import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; public class TestResourceAwareScheduler { private static final Logger LOG = LoggerFactory.getLogger(TestResourceAwareScheduler.class); + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + }; private final Config defaultTopologyConf; private int currentTime = 1450418597; private IScheduler scheduler = null; public TestResourceAwareScheduler() { - defaultTopologyConf = createClusterConfig(10, 128, 0, null); + defaultTopologyConf = createClusterConfig(DefaultResourceAwareStrategy.class, 10, 128, 0, null); defaultTopologyConf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 8192.0); defaultTopologyConf.put(Config.TOPOLOGY_PRIORITY, 0); } - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - - protected Class getGenericResourceAwareStrategyClass() { - return GenericResourceAwareStrategy.class; - } - - private Config createGrasClusterConfig(double compPcore, double compOnHeap, double compOffHeap, - Map> pools, Map genericResourceMap) { - Config config = TestUtilsForResourceAwareScheduler.createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, pools, genericResourceMap); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getGenericResourceAwareStrategyClass().getName()); - return config; - } - - private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + private Config createClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, Map> pools) { Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @@ -712,14 +704,14 @@ public void testHeterogeneousCluster(Config topologyConf, String strategyName) { @Test public void testHeterogeneousClusterwithDefaultRas() { - testHeterogeneousCluster(defaultTopologyConf, getDefaultResourceAwareStrategyClass().getSimpleName()); + testHeterogeneousCluster(defaultTopologyConf, DefaultResourceAwareStrategy.class.getSimpleName()); } @Test public void testHeterogeneousClusterwithGras() { Config grasClusterConfig = (Config) defaultTopologyConf.clone(); - grasClusterConfig.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getGenericResourceAwareStrategyClass().getName()); - testHeterogeneousCluster(grasClusterConfig, getGenericResourceAwareStrategyClass().getSimpleName()); + grasClusterConfig.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName()); + testHeterogeneousCluster(grasClusterConfig, GenericResourceAwareStrategy.class.getSimpleName()); } @Test @@ -791,79 +783,87 @@ public void testSubmitUsersWithNoGuarantees() { Map> resourceUserPool = userResourcePool( userRes("jerry", 200, 2000)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); + for (Class strategyClass: strategyClasses) { + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); - Topologies topologies = new Topologies( - genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry"), - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + Topologies topologies = new Topologies( + genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry"), + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-3", "topo-4"); - assertTopologiesNotScheduled(cluster, "topo-5"); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-3", "topo-4"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-5"); + } } @Test public void testMultipleUsers() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(20, 4, 1000, 1024 * 10); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 1_000, 8_192), - userRes("bobby", 10_000, 32_768), - userRes("derek", 5_000, 16_384)); - Config config = createClusterConfig(10, 128, 0, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, 20, "jerry"), - genTopology("topo-2", config, 5, 15, 1, 1, currentTime - 8, 29, "jerry"), - genTopology("topo-3", config, 5, 15, 1, 1, currentTime - 16, 29, "jerry"), - genTopology("topo-4", config, 5, 15, 1, 1, currentTime - 16, 20, "jerry"), - genTopology("topo-5", config, 5, 15, 1, 1, currentTime - 24, 29, "jerry"), - genTopology("topo-6", config, 5, 15, 1, 1, currentTime - 2, 20, "bobby"), - genTopology("topo-7", config, 5, 15, 1, 1, currentTime - 8, 29, "bobby"), - genTopology("topo-8", config, 5, 15, 1, 1, currentTime - 16, 29, "bobby"), - genTopology("topo-9", config, 5, 15, 1, 1, currentTime - 16, 20, "bobby"), - genTopology("topo-10", config, 5, 15, 1, 1, currentTime - 24, 29, "bobby"), - genTopology("topo-11", config, 5, 15, 1, 1, currentTime - 2, 20, "derek"), - genTopology("topo-12", config, 5, 15, 1, 1, currentTime - 8, 29, "derek"), - genTopology("topo-13", config, 5, 15, 1, 1, currentTime - 16, 29, "derek"), - genTopology("topo-14", config, 5, 15, 1, 1, currentTime - 16, 20, "derek"), - genTopology("topo-15", config, 5, 15, 1, 1, currentTime - 24, 29, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + for (Class strategyClass: strategyClasses) { + if (strategyClass.getName().equals(RoundRobinResourceAwareStrategy.class.getName())) { + continue; // exclude RoundRbin from this test + } + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(20, 4, 1000, 1024 * 10); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 1_000, 8_192), + userRes("bobby", 10_000, 32_768), + userRes("derek", 5_000, 16_384)); + Config config = createClusterConfig(strategyClass, 10, 128, 0, resourceUserPool); + TopologyDetails[] topos = { + genTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, 20, "jerry"), + genTopology("topo-2", config, 5, 15, 1, 1, currentTime - 8, 29, "jerry"), + genTopology("topo-3", config, 5, 15, 1, 1, currentTime - 16, 29, "jerry"), + genTopology("topo-4", config, 5, 15, 1, 1, currentTime - 16, 20, "jerry"), + genTopology("topo-5", config, 5, 15, 1, 1, currentTime - 24, 29, "jerry"), + genTopology("topo-6", config, 5, 15, 1, 1, currentTime - 2, 20, "bobby"), + genTopology("topo-7", config, 5, 15, 1, 1, currentTime - 8, 29, "bobby"), + genTopology("topo-8", config, 5, 15, 1, 1, currentTime - 16, 29, "bobby"), + genTopology("topo-9", config, 5, 15, 1, 1, currentTime - 16, 20, "bobby"), + genTopology("topo-10", config, 5, 15, 1, 1, currentTime - 24, 29, "bobby"), + genTopology("topo-11", config, 5, 15, 1, 1, currentTime - 2, 20, "derek"), + genTopology("topo-12", config, 5, 15, 1, 1, currentTime - 8, 29, "derek"), + genTopology("topo-13", config, 5, 15, 1, 1, currentTime - 16, 29, "derek"), + genTopology("topo-14", config, 5, 15, 1, 1, currentTime - 16, 20, "derek"), + genTopology("topo-15", config, 5, 15, 1, 1, currentTime - 24, 29, "derek"), + }; + Topologies topologies = new Topologies(topos); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - for (TopologyDetails td : topologies) { - assertTopologiesFullyScheduled(cluster, td.getName()); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertTopologiesFullyScheduled(cluster, strategyClass, topos.length); } } @Test public void testHandlingClusterSubscription() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(1, 4, 200, 1024 * 10); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 1_000, 8_192), - userRes("bobby", 10_000, 32_768), - userRes("derek", 5_000, 16_384)); - Config config = createClusterConfig(10, 128, 0, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, 20, "jerry"), - genTopology("topo-2", config, 5, 15, 1, 1, currentTime - 8, 29, "jerry")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(1, 4, 200, 1024 * 10); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 1_000, 8_192), + userRes("bobby", 10_000, 32_768), + userRes("derek", 5_000, 16_384)); + Config config = createClusterConfig(strategyClass, 10, 128, 0, resourceUserPool); + + Topologies topologies = new Topologies( + genTopology("topo-1", config, 5, 15, 1, 1, currentTime - 2, 20, "jerry"), + genTopology("topo-2", config, 5, 15, 1, 1, currentTime - 8, 29, "jerry")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - assertTopologiesFullyScheduled(cluster, "topo-1"); - assertTopologiesNotScheduled(cluster, "topo-2"); + assertTopologiesFullyScheduled(cluster, strategyClass,"topo-1"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-2"); + } } /** @@ -872,55 +872,57 @@ public void testHandlingClusterSubscription() { */ @Test public void testFaultTolerance() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(6, 4, 100, 1000); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 50, 500), - userRes("bobby", 200, 2_000), - userRes("derek", 100, 1_000)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 21, "jerry"), - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek"), - genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(6, 4, 100, 1000); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 50, 500), + userRes("bobby", 200, 2_000), + userRes("derek", 100, 1_000)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + + Topologies topologies = new Topologies( + genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 21, "jerry"), + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek"), + genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10, "derek")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-3", "topo-4", "topo-5", "topo-6"); - - //fail supervisor - SupervisorDetails supFailed = cluster.getSupervisors().values().iterator().next(); - LOG.info("/***** failing supervisor: {} ****/", supFailed.getHost()); - supMap.remove(supFailed.getId()); - Map newAssignments = new HashMap<>(); - for (Map.Entry topoToAssignment : cluster.getAssignments().entrySet()) { - String topoId = topoToAssignment.getKey(); - SchedulerAssignment assignment = topoToAssignment.getValue(); - Map executorToSlots = new HashMap<>(); - for (Map.Entry execToWorker : assignment.getExecutorToSlot().entrySet()) { - ExecutorDetails exec = execToWorker.getKey(); - WorkerSlot ws = execToWorker.getValue(); - if (!ws.getNodeId().equals(supFailed.getId())) { - executorToSlots.put(exec, ws); + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-3", "topo-4", "topo-5", "topo-6"); + + //fail supervisor + SupervisorDetails supFailed = cluster.getSupervisors().values().iterator().next(); + LOG.info("/***** failing supervisor: {} ****/", supFailed.getHost()); + supMap.remove(supFailed.getId()); + Map newAssignments = new HashMap<>(); + for (Map.Entry topoToAssignment : cluster.getAssignments().entrySet()) { + String topoId = topoToAssignment.getKey(); + SchedulerAssignment assignment = topoToAssignment.getValue(); + Map executorToSlots = new HashMap<>(); + for (Map.Entry execToWorker : assignment.getExecutorToSlot().entrySet()) { + ExecutorDetails exec = execToWorker.getKey(); + WorkerSlot ws = execToWorker.getValue(); + if (!ws.getNodeId().equals(supFailed.getId())) { + executorToSlots.put(exec, ws); + } } + newAssignments.put(topoId, new SchedulerAssignmentImpl(topoId, executorToSlots, null, null)); } - newAssignments.put(topoId, new SchedulerAssignmentImpl(topoId, executorToSlots, null, null)); - } - Map statusMap = cluster.getStatusMap(); - LOG.warn("Rescheduling with removed Supervisor...."); - cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, newAssignments, topologies, config); - cluster.setStatusMap(statusMap); - scheduler.schedule(topologies, cluster); + Map statusMap = cluster.getStatusMap(); + LOG.warn("Rescheduling with removed Supervisor...."); + cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, newAssignments, topologies, config); + cluster.setStatusMap(statusMap); + scheduler.schedule(topologies, cluster); - assertTopologiesFullyScheduled(cluster, "topo-2", "topo-3", "topo-4", "topo-5", "topo-6"); - assertTopologiesNotScheduled(cluster, "topo-1"); + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-2", "topo-3", "topo-4", "topo-5", "topo-6"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-1"); + } } /** @@ -928,37 +930,39 @@ public void testFaultTolerance() { */ @Test public void testNodeFreeSlot() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - Config config = createClusterConfig(100, 500, 500, null); - - Topologies topologies = new Topologies( - genTopology("topo-1", config, 1, 0, 2, 0, currentTime - 2, 29, "user"), - genTopology("topo-2", config, 1, 0, 2, 0, currentTime - 2, 10, "user")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + Config config = createClusterConfig(strategyClass, 100, 500, 500, null); + + Topologies topologies = new Topologies( + genTopology("topo-1", config, 1, 0, 2, 0, currentTime - 2, 29, "user"), + genTopology("topo-2", config, 1, 0, 2, 0, currentTime - 2, 10, "user")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); - Map nodes = RasNodes.getAllNodesFrom(cluster); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - for (SchedulerAssignment assignment : cluster.getAssignments().values()) { - for (Entry entry : new HashMap<>(assignment.getScheduledResources()).entrySet()) { - WorkerSlot ws = entry.getKey(); - WorkerResources wr = entry.getValue(); - double memoryBefore = nodes.get(ws.getNodeId()).getAvailableMemoryResources(); - double cpuBefore = nodes.get(ws.getNodeId()).getAvailableCpuResources(); - double memoryUsedByWorker = wr.get_mem_on_heap() + wr.get_mem_off_heap(); - assertEquals(1000.0, memoryUsedByWorker, 0.001, "Check if memory used by worker is calculated correctly"); - double cpuUsedByWorker = wr.get_cpu(); - assertEquals(100.0, cpuUsedByWorker, 0.001, "Check if CPU used by worker is calculated correctly"); - nodes.get(ws.getNodeId()).free(ws); - double memoryAfter = nodes.get(ws.getNodeId()).getAvailableMemoryResources(); - double cpuAfter = nodes.get(ws.getNodeId()).getAvailableCpuResources(); - assertEquals(memoryBefore + memoryUsedByWorker, memoryAfter, 0.001, "Check if free correctly frees amount of memory"); - assertEquals(cpuBefore + cpuUsedByWorker, cpuAfter, 0.001, "Check if free correctly frees amount of memory"); - assertFalse(assignment.getSlotToExecutors().containsKey(ws), "Check if worker was removed from assignments"); + Map nodes = RasNodes.getAllNodesFrom(cluster); + + for (SchedulerAssignment assignment : cluster.getAssignments().values()) { + for (Entry entry : new HashMap<>(assignment.getScheduledResources()).entrySet()) { + WorkerSlot ws = entry.getKey(); + WorkerResources wr = entry.getValue(); + double memoryBefore = nodes.get(ws.getNodeId()).getAvailableMemoryResources(); + double cpuBefore = nodes.get(ws.getNodeId()).getAvailableCpuResources(); + double memoryUsedByWorker = wr.get_mem_on_heap() + wr.get_mem_off_heap(); + assertEquals(1000.0, memoryUsedByWorker, 0.001, "Check if memory used by worker is calculated correctly"); + double cpuUsedByWorker = wr.get_cpu(); + assertEquals(100.0, cpuUsedByWorker, 0.001, "Check if CPU used by worker is calculated correctly"); + nodes.get(ws.getNodeId()).free(ws); + double memoryAfter = nodes.get(ws.getNodeId()).getAvailableMemoryResources(); + double cpuAfter = nodes.get(ws.getNodeId()).getAvailableCpuResources(); + assertEquals(memoryBefore + memoryUsedByWorker, memoryAfter, 0.001, "Check if free correctly frees amount of memory"); + assertEquals(cpuBefore + cpuUsedByWorker, cpuAfter, 0.001, "Check if free correctly frees amount of memory"); + assertFalse(assignment.getSlotToExecutors().containsKey(ws), "Check if worker was removed from assignments"); + } } } } @@ -968,26 +972,28 @@ public void testNodeFreeSlot() { */ @Test public void testSchedulingAfterFailedScheduling() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(8, 4, 100, 1000); - Config config = createClusterConfig(100, 500, 500, null); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(8, 4, 100, 1000); + Config config = createClusterConfig(strategyClass, 100, 500, 500, null); - TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, currentTime - 2, 10, "jerry"); - TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 2, 0, currentTime - 2, 20, "jerry"); - TopologyDetails topo3 = genTopology("topo-3", config, 1, 2, 1, 1, currentTime - 2, 20, "jerry"); + TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, currentTime - 2, 10, "jerry"); + TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 2, 0, currentTime - 2, 20, "jerry"); + TopologyDetails topo3 = genTopology("topo-3", config, 1, 2, 1, 1, currentTime - 2, 20, "jerry"); - Topologies topologies = new Topologies(topo1, topo2, topo3); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); - scheduler = new ResourceAwareScheduler(); + Topologies topologies = new Topologies(topo1, topo2, topo3); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); + scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - assertFalse(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 unscheduled?"); - assertTrue(cluster.getAssignmentById(topo2.getId()) != null, "Topo-2 scheduled?"); - assertEquals(4, cluster.getAssignmentById(topo2.getId()).getExecutorToSlot().size(), "Topo-2 all executors scheduled?"); - assertTrue(cluster.getAssignmentById(topo3.getId()) != null, "Topo-3 scheduled?"); - assertEquals(3, cluster.getAssignmentById(topo3.getId()).getExecutorToSlot().size(), "Topo-3 all executors scheduled?"); + assertFalse(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 unscheduled?"); + assertTrue(cluster.getAssignmentById(topo2.getId()) != null, "Topo-2 scheduled?"); + assertEquals(4, cluster.getAssignmentById(topo2.getId()).getExecutorToSlot().size(), "Topo-2 all executors scheduled?"); + assertTrue(cluster.getAssignmentById(topo3.getId()) != null, "Topo-3 scheduled?"); + assertEquals(3, cluster.getAssignmentById(topo3.getId()).getExecutorToSlot().size(), "Topo-3 all executors scheduled?"); + } } /** @@ -996,18 +1002,20 @@ public void testSchedulingAfterFailedScheduling() { */ @Test public void minCpuWorkerJustFits() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(1, 4, 100, 60000); - Config config = createClusterConfig(10, 500, 500, null); - config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 50.0); - TopologyDetails topo1 = genTopology("topo-1", config, 10, 0, 1, 1, currentTime - 2, 20, "jerry"); - Topologies topologies = new Topologies(topo1); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - assertFalse(cluster.needsSchedulingRas(topo1)); - assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 scheduled?"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(1, 4, 100, 60000); + Config config = createClusterConfig(strategyClass, 10, 500, 500, null); + config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 50.0); + TopologyDetails topo1 = genTopology("topo-1", config, 10, 0, 1, 1, currentTime - 2, 20, "jerry"); + Topologies topologies = new Topologies(topo1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertFalse(cluster.needsSchedulingRas(topo1)); + assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 scheduled?"); + } } /** @@ -1016,89 +1024,76 @@ public void minCpuWorkerJustFits() { */ @Test public void minCpuPreventsThirdTopo() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(1, 4, 100, 60000); - Config config = createClusterConfig(10, 500, 500, null); - config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 40.0); - TopologyDetails topo1 = genTopology("topo-1", config, 2, 0, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo3 = genTopology("topo-3", config, 2, 0, 1, 1, currentTime - 2, 20, "jerry"); - Topologies topologies = new Topologies(topo1, topo2, topo3); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - assertFalse(cluster.needsSchedulingRas(topo1)); - assertFalse(cluster.needsSchedulingRas(topo2)); - assertTrue(cluster.needsSchedulingRas(topo3)); - assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "topo-1 scheduled?"); - assertTrue(cluster.getAssignmentById(topo2.getId()) != null, "topo-2 scheduled?"); - assertFalse(cluster.getAssignmentById(topo3.getId()) != null, "topo-3 unscheduled?"); - - SchedulerAssignment assignment1 = cluster.getAssignmentById(topo1.getId()); - assertEquals(1, assignment1.getSlots().size()); - Map assignedSlots1 = assignment1.getScheduledResources(); - double assignedCpu = 0.0; - for (Entry entry : assignedSlots1.entrySet()) { - WorkerResources wr = entry.getValue(); - assignedCpu += wr.get_cpu(); - } - assertEquals(40.0, assignedCpu, 0.001); + for (Class strategyClass: strategyClasses) { + if (strategyClass.getName().equals(RoundRobinResourceAwareStrategy.class.getName())) { + continue; // exclude RoundRbin from this test + } + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(1, 4, 100, 60000); + Config config = createClusterConfig(strategyClass, 10, 500, 500, null); + config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 40.0); + TopologyDetails topo1 = genTopology("topo-1", config, 2, 0, 1, 1, currentTime - 2, 20, "jerry"); + TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 1, 1, currentTime - 2, 20, "jerry"); + TopologyDetails topo3 = genTopology("topo-3", config, 2, 0, 1, 1, currentTime - 2, 20, "jerry"); + Topologies topologies = new Topologies(topo1, topo2, topo3); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertFalse(cluster.needsSchedulingRas(topo1), "using " + strategyClass); + assertFalse(cluster.needsSchedulingRas(topo2), "using " + strategyClass); + assertTrue(cluster.needsSchedulingRas(topo3), "using " + strategyClass); + assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "topo-1 scheduled? using " + strategyClass); + assertTrue(cluster.getAssignmentById(topo2.getId()) != null, "topo-2 scheduled? using " + strategyClass); + assertFalse(cluster.getAssignmentById(topo3.getId()) != null, "topo-3 unscheduled? using " + strategyClass); + + SchedulerAssignment assignment1 = cluster.getAssignmentById(topo1.getId()); + assertEquals(1, assignment1.getSlots().size()); + Map assignedSlots1 = assignment1.getScheduledResources(); + double assignedCpu = 0.0; + for (Entry entry : assignedSlots1.entrySet()) { + WorkerResources wr = entry.getValue(); + assignedCpu += wr.get_cpu(); + } + assertEquals(40.0, assignedCpu, 0.001); - SchedulerAssignment assignment2 = cluster.getAssignmentById(topo2.getId()); - assertEquals(1, assignment2.getSlots().size()); - Map assignedSlots2 = assignment2.getScheduledResources(); - assignedCpu = 0.0; - for (Entry entry : assignedSlots2.entrySet()) { - WorkerResources wr = entry.getValue(); - assignedCpu += wr.get_cpu(); + SchedulerAssignment assignment2 = cluster.getAssignmentById(topo2.getId()); + assertEquals(1, assignment2.getSlots().size()); + Map assignedSlots2 = assignment2.getScheduledResources(); + assignedCpu = 0.0; + for (Entry entry : assignedSlots2.entrySet()) { + WorkerResources wr = entry.getValue(); + assignedCpu += wr.get_cpu(); + } + assertEquals(40.0, assignedCpu, 0.001); } - assertEquals(40.0, assignedCpu, 0.001); } @Test public void testMinCpuMaxMultipleSupervisors() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(3, 4, 300, 60000); - Config config = createClusterConfig(5, 50, 50, null); - config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 100.0); - TopologyDetails topo0 = genTopology("topo-0", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo1 = genTopology("topo-1", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo2 = genTopology("topo-2", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo3 = genTopology("topo-3", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo4 = genTopology("topo-4", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo5 = genTopology("topo-5", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo6 = genTopology("topo-6", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo7 = genTopology("topo-7", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo8 = genTopology("topo-8", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - TopologyDetails topo9 = genTopology("topo-9", config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); - Topologies topologies = new Topologies(topo0, topo1, topo2, topo3, topo4, topo5, topo6, topo7, topo8, topo9); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + int topoCnt = 10; + for (Class strategyClass: strategyClasses) { + if (strategyClass.getName().equals(RoundRobinResourceAwareStrategy.class.getName())) { + continue; // exclude RoundRbin from this test + } + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(3, 4, 300, 60000); + Config config = createClusterConfig(strategyClass, 5, 50, 50, null); + config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 100.0); + TopologyDetails[] topos = new TopologyDetails[topoCnt]; + for (int i = 0 ; i < topoCnt ; i++) { + String topoName = "topo-" + i; + topos[i] = genTopology(topoName, config, 4, 5, 1, 1, currentTime - 2, 20, "jerry"); + } + Topologies topologies = new Topologies(topos); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - assertFalse(cluster.needsSchedulingRas(topo0)); - assertFalse(cluster.needsSchedulingRas(topo1)); - assertFalse(cluster.needsSchedulingRas(topo2)); - assertFalse(cluster.needsSchedulingRas(topo3)); - assertFalse(cluster.needsSchedulingRas(topo4)); - assertFalse(cluster.needsSchedulingRas(topo5)); - assertFalse(cluster.needsSchedulingRas(topo6)); - assertFalse(cluster.needsSchedulingRas(topo7)); - assertFalse(cluster.needsSchedulingRas(topo8)); - assertTrue(cluster.needsSchedulingRas(topo9)); - - assertTrue(cluster.getAssignmentById(topo0.getId()) != null,"topo-0 scheduled?"); - assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "topo-1 scheduled?"); - assertTrue(cluster.getAssignmentById(topo2.getId()) != null, "topo-2 scheduled?"); - assertTrue(cluster.getAssignmentById(topo3.getId()) != null, "topo-3 scheduled?"); - assertTrue(cluster.getAssignmentById(topo4.getId()) != null, "topo-4 scheduled?"); - assertTrue(cluster.getAssignmentById(topo5.getId()) != null, "topo-5 scheduled?"); - assertTrue(cluster.getAssignmentById(topo6.getId()) != null, "topo-6 scheduled?"); - assertTrue(cluster.getAssignmentById(topo7.getId()) != null, "topo-7 scheduled?"); - assertTrue(cluster.getAssignmentById(topo8.getId()) != null, "topo-8 scheduled?"); - assertFalse(cluster.getAssignmentById(topo9.getId()) != null, "topo-9 unscheduled?"); + // topo-9 will not be scheduled + assertTopologiesFullyScheduled(cluster, strategyClass, topoCnt - 1); + } } /** @@ -1107,19 +1102,21 @@ public void testMinCpuMaxMultipleSupervisors() { */ @Test public void minCpuWorkerSplitFails() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(1, 4, 100, 60000); - Config config = createClusterConfig(10, 500, 500, null); - config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 50.0); - TopologyDetails topo1 = genTopology("topo-1", config, 10, 0, 1, 1, currentTime - 2, 20, - "jerry", 2000.0); - Topologies topologies = new Topologies(topo1); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - assertTrue(cluster.needsSchedulingRas(topo1)); - assertFalse(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 unscheduled?"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(1, 4, 100, 60000); + Config config = createClusterConfig(strategyClass, 10, 500, 500, null); + config.put(DaemonConfig.STORM_WORKER_MIN_CPU_PCORE_PERCENT, 50.0); + TopologyDetails topo1 = genTopology("topo-1", config, 10, 0, 1, 1, currentTime - 2, 20, + "jerry", 2000.0); + Topologies topologies = new Topologies(topo1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertTrue(cluster.needsSchedulingRas(topo1)); + assertFalse(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 unscheduled?"); + } } protected static class TimeBlockResult { @@ -1199,23 +1196,23 @@ public void TestLargeFragmentedClusterScheduling() { GenericResourceAwareStrategy 7.78 ConstraintSolverStrategy 7.75 */ - final int numNodes = 500; final int numRuns = 5; Map strategyToConfigs = new HashMap<>(); - strategyToConfigs.put(getDefaultResourceAwareStrategyClass().getName(), createClusterConfig(10, 10, 0, null)); - strategyToConfigs.put(getGenericResourceAwareStrategyClass().getName(), createGrasClusterConfig(10, 10, 0, null, null)); - strategyToConfigs.put(ConstraintSolverStrategy.class.getName(), createCSSClusterConfig(10, 10, 0, null)); - - Map strategyToTimeBlockResults = new HashMap<>(); - // AcceptedBlockTimeRatios obtained by empirical testing (see comment block above) Map strategyToAcceptedBlockTimeRatios = new HashMap<>(); - strategyToAcceptedBlockTimeRatios.put(getDefaultResourceAwareStrategyClass().getName(), 6.96); - strategyToAcceptedBlockTimeRatios.put(getGenericResourceAwareStrategyClass().getName(), 7.78); + for (Class strategyClass: strategyClasses) { + strategyToConfigs.put(strategyClass.getName(), createClusterConfig(strategyClass, 10, 10, 0, null)); + strategyToAcceptedBlockTimeRatios.put(strategyClass.getName(), 6.96); + } + strategyToAcceptedBlockTimeRatios.put(DefaultResourceAwareStrategy.class.getName(), 6.96); + strategyToAcceptedBlockTimeRatios.put(GenericResourceAwareStrategy.class.getName(), 7.78); + strategyToConfigs.put(ConstraintSolverStrategy.class.getName(), createCSSClusterConfig(10, 10, 0, null)); strategyToAcceptedBlockTimeRatios.put(ConstraintSolverStrategy.class.getName(), 7.75); + Map strategyToTimeBlockResults = new HashMap<>(); + // Get first and last block times for multiple runs and strategies long startTime = Time.currentTimeMillis(); for (Entry strategyConfig : strategyToConfigs.entrySet()) { @@ -1339,73 +1336,87 @@ private TimeBlockResult testLargeClusterSchedulingTiming(int numNodes, Config co */ @Test public void testMultipleSpoutsAndCyclicTopologies() { + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + + TopologyBuilder builder = new TopologyBuilder(); + + builder.setSpout("spout-1", new TestSpout(), + 5); + builder.setSpout("spout-2", new TestSpout(), + 5); + builder.setBolt("bolt-1", new TestBolt(), + 5).shuffleGrouping("spout-1").shuffleGrouping("bolt-3"); + builder.setBolt("bolt-2", new TestBolt(), + 5).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + 5).shuffleGrouping("bolt-2").shuffleGrouping("spout-2"); + + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(25, 1, 100, 1000); + Config config = createClusterConfig(strategyClass, 100, 500, 500, null); + + StormTopology stormTopology = builder.createTopology(); + config.put(Config.TOPOLOGY_SUBMITTER_USER, "jerry"); + TopologyDetails topo = new TopologyDetails("topo-1", config, stormTopology, + 0, genExecsAndComps(stormTopology), 0, "jerry"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - TopologyBuilder builder = new TopologyBuilder(); - - builder.setSpout("spout-1", new TestSpout(), - 5); - builder.setSpout("spout-2", new TestSpout(), - 5); - builder.setBolt("bolt-1", new TestBolt(), - 5).shuffleGrouping("spout-1").shuffleGrouping("bolt-3"); - builder.setBolt("bolt-2", new TestBolt(), - 5).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - 5).shuffleGrouping("bolt-2").shuffleGrouping("spout-2"); - - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(25, 1, 100, 1000); - Config config = createClusterConfig(100, 500, 500, null); - - StormTopology stormTopology = builder.createTopology(); - config.put(Config.TOPOLOGY_SUBMITTER_USER, "jerry"); - TopologyDetails topo = new TopologyDetails("topo-1", config, stormTopology, - 0, genExecsAndComps(stormTopology), 0, "jerry"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - assertTrue(cluster.getAssignmentById(topo.getId()) != null, "Topo scheduled?"); - assertEquals(25, cluster.getAssignmentById(topo.getId()).getExecutorToSlot().size(), "Topo all executors scheduled?"); + assertTrue(cluster.getAssignmentById(topo.getId()) != null, "Topo scheduled?"); + assertEquals(25, cluster.getAssignmentById(topo.getId()).getExecutorToSlot().size(), "Topo all executors scheduled?"); + } } @Test public void testSchedulerStrategyWhitelist() { Map config = ConfigUtils.readStormConfig(); - String allowed = getDefaultResourceAwareStrategyClass().getName(); - config.put(Config.NIMBUS_SCHEDULER_STRATEGY_CLASS_WHITELIST, Arrays.asList(allowed)); + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + String allowed = strategyClassName; + config.put(Config.NIMBUS_SCHEDULER_STRATEGY_CLASS_WHITELIST, Arrays.asList(allowed)); - Object sched = ReflectionUtils.newSchedulerStrategyInstance(allowed, config); - assertEquals(sched.getClass().getName(), allowed); + Object sched = ReflectionUtils.newSchedulerStrategyInstance(allowed, config); + assertEquals(sched.getClass().getName(), allowed); + } } @Test public void testSchedulerStrategyWhitelistException() { Map config = ConfigUtils.readStormConfig(); String allowed = "org.apache.storm.scheduler.resource.strategies.scheduling.SomeNonExistantStrategy"; - String notAllowed = getDefaultResourceAwareStrategyClass().getName(); - config.put(Config.NIMBUS_SCHEDULER_STRATEGY_CLASS_WHITELIST, Arrays.asList(allowed)); + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + String notAllowed = strategyClassName; + config.put(Config.NIMBUS_SCHEDULER_STRATEGY_CLASS_WHITELIST, Arrays.asList(allowed)); - Assertions.assertThrows(DisallowedStrategyException.class, () -> ReflectionUtils.newSchedulerStrategyInstance(notAllowed, config)); + Assertions.assertThrows(DisallowedStrategyException.class, () -> ReflectionUtils.newSchedulerStrategyInstance(notAllowed, config)); + } } @Test public void testSchedulerStrategyEmptyWhitelist() { Map config = ConfigUtils.readStormConfig(); - String allowed = getDefaultResourceAwareStrategyClass().getName(); - - Object sched = ReflectionUtils.newSchedulerStrategyInstance(allowed, config); - assertEquals(sched.getClass().getName(), allowed); + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + String allowed = strategyClassName; + Object sched = ReflectionUtils.newSchedulerStrategyInstance(allowed, config); + assertEquals(sched.getClass().getName(), allowed); + } } @PerformanceTest @Test public void testLargeTopologiesOnLargeClusters() { - Assertions.assertTimeoutPreemptively(Duration.ofSeconds(30), - () -> testLargeTopologiesCommon(getDefaultResourceAwareStrategyClass().getName(), false, 1)); + for (Class strategyClass: strategyClasses) { + String strategyClassName = strategyClass.getName(); + Assertions.assertTimeoutPreemptively(Duration.ofSeconds(30), + () -> testLargeTopologiesCommon(strategyClassName, false, 1)); + } } @@ -1413,7 +1424,14 @@ public void testLargeTopologiesOnLargeClusters() { @Test public void testLargeTopologiesOnLargeClustersGras() { Assertions.assertTimeoutPreemptively(Duration.ofSeconds(75), - () -> testLargeTopologiesCommon(getGenericResourceAwareStrategyClass().getName(), true, 1)); + () -> testLargeTopologiesCommon(GenericResourceAwareStrategy.class.getName(), true, 1)); + } + + @PerformanceTest + @Test + public void testLargeTopologiesOnLargeClustersRoundRobin() { + Assertions.assertTimeoutPreemptively(Duration.ofSeconds(30), + () -> testLargeTopologiesCommon(RoundRobinResourceAwareStrategy.class.getName(), true, 1)); } public static class NeverEndingSchedulingStrategy extends BaseResourceAwareStrategy { @@ -1435,96 +1453,103 @@ public SchedulingResult schedule(Cluster schedulingState, TopologyDetails td) { @Test public void testStrategyTakingTooLong() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(8, 4, 100, 1000); - Config config = createClusterConfig(100, 500, 500, null); - List allowedSchedulerStrategies = new ArrayList<>(); - allowedSchedulerStrategies.add(getDefaultResourceAwareStrategyClass().getName()); - allowedSchedulerStrategies.add(DefaultResourceAwareStrategyOld.class.getName()); - allowedSchedulerStrategies.add(NeverEndingSchedulingStrategy.class.getName()); - config.put(Config.NIMBUS_SCHEDULER_STRATEGY_CLASS_WHITELIST, allowedSchedulerStrategies); - config.put(DaemonConfig.SCHEDULING_TIMEOUT_SECONDS_PER_TOPOLOGY, 30); - - TopologyDetails topo1 = genTopology("topo-1", config, 1, 0, 2, 0, currentTime - 2, 10, "jerry"); - TopologyDetails topo3 = genTopology("topo-3", config, 1, 2, 1, 1, currentTime - 2, 20, "jerry"); - - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, NeverEndingSchedulingStrategy.class.getName()); - TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 2, 0, currentTime - 2, 20, "jerry"); - - Topologies topologies = new Topologies(topo1, topo2, topo3); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(8, 4, 100, 1000); + Config config = createClusterConfig(strategyClass, 100, 500, 500, null); + List allowedSchedulerStrategies = new ArrayList<>(); + allowedSchedulerStrategies.add(DefaultResourceAwareStrategy.class.getName()); + allowedSchedulerStrategies.add(DefaultResourceAwareStrategyOld.class.getName()); + allowedSchedulerStrategies.add(GenericResourceAwareStrategy.class.getName()); + allowedSchedulerStrategies.add(GenericResourceAwareStrategyOld.class.getName()); + allowedSchedulerStrategies.add(RoundRobinResourceAwareStrategy.class.getName()); + allowedSchedulerStrategies.add(NeverEndingSchedulingStrategy.class.getName()); + config.put(Config.NIMBUS_SCHEDULER_STRATEGY_CLASS_WHITELIST, allowedSchedulerStrategies); + config.put(DaemonConfig.SCHEDULING_TIMEOUT_SECONDS_PER_TOPOLOGY, 30); + + TopologyDetails topo1 = genTopology("topo-1", config, 1, 0, 2, 0, currentTime - 2, 10, "jerry"); + TopologyDetails topo3 = genTopology("topo-3", config, 1, 2, 1, 1, currentTime - 2, 20, "jerry"); + + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, NeverEndingSchedulingStrategy.class.getName()); + TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 2, 0, currentTime - 2, 20, "jerry"); + + Topologies topologies = new Topologies(topo1, topo2, topo3); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - assertFalse(cluster.needsSchedulingRas(topo1)); - assertTrue(cluster.needsSchedulingRas(topo2)); - assertFalse(cluster.needsSchedulingRas(topo3)); - - assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 scheduled?"); - assertEquals(2, cluster.getAssignmentById(topo1.getId()).getExecutorToSlot().size(), "Topo-1 all executors scheduled?"); - assertTrue(cluster.getAssignmentById(topo2.getId()) == null, "Topo-2 not scheduled"); - assertEquals("Scheduling took too long for " + topo2.getId() + " using strategy " - + NeverEndingSchedulingStrategy.class.getName() - + " timeout after 30 seconds using config scheduling.timeout.seconds.per.topology.", cluster.getStatusMap().get(topo2.getId())); - assertTrue(cluster.getAssignmentById(topo3.getId()) != null, "Topo-3 scheduled?"); - assertEquals(3, cluster.getAssignmentById(topo3.getId()).getExecutorToSlot().size(), "Topo-3 all executors scheduled?"); + assertFalse(cluster.needsSchedulingRas(topo1)); + assertTrue(cluster.needsSchedulingRas(topo2)); + assertFalse(cluster.needsSchedulingRas(topo3)); + + assertTrue(cluster.getAssignmentById(topo1.getId()) != null, "Topo-1 scheduled?"); + assertEquals(2, cluster.getAssignmentById(topo1.getId()).getExecutorToSlot().size(), "Topo-1 all executors scheduled?"); + assertTrue(cluster.getAssignmentById(topo2.getId()) == null, "Topo-2 not scheduled"); + assertEquals("Scheduling took too long for " + topo2.getId() + " using strategy " + + NeverEndingSchedulingStrategy.class.getName() + + " timeout after 30 seconds using config scheduling.timeout.seconds.per.topology.", cluster.getStatusMap().get(topo2.getId())); + assertTrue(cluster.getAssignmentById(topo3.getId()) != null, "Topo-3 scheduled?"); + assertEquals(3, cluster.getAssignmentById(topo3.getId()).getExecutorToSlot().size(), "Topo-3 all executors scheduled?"); + } } public void testLargeTopologiesCommon(final String strategy, final boolean includeGpu, final int multiplier) { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisorsWithRacks(25 * multiplier, 40, 66, 3 * multiplier, 0, 4700, 226200, new HashMap<>()); - if (includeGpu) { - HashMap extraResources = new HashMap<>(); - extraResources.put("my.gpu", 1.0); - supMap.putAll(genSupervisorsWithRacks(3 * multiplier, 40, 66, 0, 0, 4700, 226200, extraResources)); - } + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisorsWithRacks(25 * multiplier, 40, 66, 3 * multiplier, 0, 4700, 226200, new HashMap<>()); + if (includeGpu) { + HashMap extraResources = new HashMap<>(); + extraResources.put("my.gpu", 1.0); + supMap.putAll(genSupervisorsWithRacks(3 * multiplier, 40, 66, 0, 0, 4700, 226200, extraResources)); + } - Config config = new Config(); - config.putAll(createClusterConfig(88, 775, 25, null)); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategy); + Config config = new Config(); + config.putAll(createClusterConfig(strategyClass, 88, 775, 25, null)); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategy); - scheduler = new ResourceAwareScheduler(); + scheduler = new ResourceAwareScheduler(); - Map topologyDetailsMap = new HashMap<>(); - for (int i = 0; i < 11 * multiplier; i++) { - TopologyDetails td = genTopology(String.format("topology-%05d", i), config, 5, - 40, 30, 114, 0, 0, "user", 8192); - topologyDetailsMap.put(td.getId(), td); - } - if (includeGpu) { - for (int i = 0; i < multiplier; i++) { - TopologyBuilder builder = topologyBuilder(5, 40, 30, 114); - builder.setBolt("gpu-bolt", new TestBolt(), 40) - .addResource("my.gpu", 1.0) - .shuffleGrouping("spout-0"); - TopologyDetails td = topoToTopologyDetails(String.format("topology-gpu-%05d", i), config, builder.createTopology(), 0, 0, - "user", 8192); + Map topologyDetailsMap = new HashMap<>(); + for (int i = 0; i < 11 * multiplier; i++) { + TopologyDetails td = genTopology(String.format("topology-%05d", i), config, 5, + 40, 30, 114, 0, 0, "user", 8192); topologyDetailsMap.put(td.getId(), td); } - } - Topologies topologies = new Topologies(topologyDetailsMap); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + if (includeGpu) { + for (int i = 0; i < multiplier; i++) { + TopologyBuilder builder = topologyBuilder(5, 40, 30, 114); + builder.setBolt("gpu-bolt", new TestBolt(), 40) + .addResource("my.gpu", 1.0) + .shuffleGrouping("spout-0"); + TopologyDetails td = topoToTopologyDetails(String.format("topology-gpu-%05d", i), config, builder.createTopology(), 0, 0, + "user", 8192); + topologyDetailsMap.put(td.getId(), td); + } + } + Topologies topologies = new Topologies(topologyDetailsMap); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - long startTime = Time.currentTimeMillis(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - long schedulingDuration = Time.currentTimeMillis() - startTime; - LOG.info("Scheduling took " + schedulingDuration + " ms"); - LOG.info("HAS {} SLOTS USED", cluster.getUsedSlots().size()); - - Map assignments = new TreeMap<>(cluster.getAssignments()); - - for (Entry entry: assignments.entrySet()) { - SchedulerAssignment sa = entry.getValue(); - Map slotsPerRack = new TreeMap<>(); - for (WorkerSlot slot : sa.getSlots()) { - String nodeId = slot.getNodeId(); - String rack = supervisorIdToRackName(nodeId); - slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet(); + long startTime = Time.currentTimeMillis(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + long schedulingDuration = Time.currentTimeMillis() - startTime; + LOG.info("Scheduling took " + schedulingDuration + " ms"); + LOG.info("HAS {} SLOTS USED", cluster.getUsedSlots().size()); + + Map assignments = new TreeMap<>(cluster.getAssignments()); + + for (Entry entry : assignments.entrySet()) { + SchedulerAssignment sa = entry.getValue(); + Map slotsPerRack = new TreeMap<>(); + for (WorkerSlot slot : sa.getSlots()) { + String nodeId = slot.getNodeId(); + String rack = supervisorIdToRackName(nodeId); + slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet(); + } + LOG.info("{} => {}", entry.getKey(), slotsPerRack); } - LOG.info("{} => {}", entry.getKey(), slotsPerRack); } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUser.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUser.java index 6071e4c6db5..d3f989dd00a 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUser.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUser.java @@ -15,14 +15,20 @@ import java.util.HashMap; import java.util.Map; import org.apache.storm.Config; +import org.apache.storm.metric.StormMetricsRegistry; import org.apache.storm.scheduler.Cluster; import org.apache.storm.scheduler.INimbus; import org.apache.storm.scheduler.SupervisorDetails; import org.apache.storm.scheduler.Topologies; import org.apache.storm.scheduler.TopologyDetails; import org.apache.storm.scheduler.WorkerSlot; +import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.INimbusTest; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.utils.Time; import org.junit.jupiter.api.Test; @@ -33,45 +39,48 @@ import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.userResourcePool; import static org.junit.jupiter.api.Assertions.assertEquals; -import org.apache.storm.metric.StormMetricsRegistry; -import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; public class TestUser { + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + DefaultResourceAwareStrategyOld.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - - private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + private Config createClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, Map> pools) { Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @Test public void testResourcePoolUtilization() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - double cpuGuarantee = 400.0; - double memoryGuarantee = 1000.0; - Map> resourceUserPool = userResourcePool( - userRes("user1", cpuGuarantee, memoryGuarantee)); - Config config = createClusterConfig(100, 200, 200, resourceUserPool); - TopologyDetails topo1 = genTopology("topo-1", config, 1, 1, 2, 1, Time.currentTimeSecs() - 24, 9, "user1"); - Topologies topologies = new Topologies(topo1); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + double cpuGuarantee = 400.0; + double memoryGuarantee = 1000.0; + Map> resourceUserPool = userResourcePool( + userRes("user1", cpuGuarantee, memoryGuarantee)); + Config config = createClusterConfig(strategyClass, 100, 200, 200, resourceUserPool); + TopologyDetails topo1 = genTopology("topo-1", config, 1, 1, 2, 1, Time.currentTimeSecs() - 24, 9, "user1"); + Topologies topologies = new Topologies(topo1); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - User user1 = new User("user1", toDouble(resourceUserPool.get("user1"))); - WorkerSlot slot = cluster.getAvailableSlots().get(0); - cluster.assign(slot, topo1.getId(), topo1.getExecutors()); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + User user1 = new User("user1", toDouble(resourceUserPool.get("user1"))); + WorkerSlot slot = cluster.getAvailableSlots().get(0); + cluster.assign(slot, topo1.getId(), topo1.getExecutors()); - assertEquals(cpuGuarantee, user1.getCpuResourceGuaranteed(), 0.001, "check cpu resource guarantee"); - assertEquals(memoryGuarantee, user1.getMemoryResourceGuaranteed(), 0.001, "check memory resource guarantee"); + assertEquals(cpuGuarantee, user1.getCpuResourceGuaranteed(), 0.001, "check cpu resource guarantee"); + assertEquals(memoryGuarantee, user1.getMemoryResourceGuaranteed(), 0.001, "check memory resource guarantee"); - assertEquals(((100.0 * 3.0) / cpuGuarantee), user1.getCpuResourcePoolUtilization(cluster), 0.001, - "check cpu resource pool utilization"); - assertEquals(((200.0 + 200.0) * 3.0) / memoryGuarantee, user1.getMemoryResourcePoolUtilization(cluster), 0.001, - "check memory resource pool utilization"); + assertEquals(((100.0 * 3.0) / cpuGuarantee), user1.getCpuResourcePoolUtilization(cluster), 0.001, + "check cpu resource pool utilization"); + assertEquals(((200.0 + 200.0) * 3.0) / memoryGuarantee, user1.getMemoryResourcePoolUtilization(cluster), 0.001, + "check memory resource pool utilization"); + } } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUtilsForResourceAwareScheduler.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUtilsForResourceAwareScheduler.java index dcffa99c900..68e48655753 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUtilsForResourceAwareScheduler.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/TestUtilsForResourceAwareScheduler.java @@ -34,6 +34,7 @@ import org.apache.storm.scheduler.resource.strategies.scheduling.ConstraintSolverStrategy; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.OutputCollector; import org.apache.storm.task.TopologyContext; @@ -63,6 +64,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class TestUtilsForResourceAwareScheduler { private static final Logger LOG = LoggerFactory.getLogger(TestUtilsForResourceAwareScheduler.class); @@ -107,6 +110,19 @@ public static Config createCSSClusterConfig(double compPcore, double compOnHeap, Map> pools) { Config config = createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, ConstraintSolverStrategy.class.getName()); + Map> modifiedConstraints = new HashMap<>(); + Map contraints = new HashMap<>(); + contraints.put("maxNodeCoLocationCnt", 1); + modifiedConstraints.put("testSpout", contraints); + config.put(Config.TOPOLOGY_RAS_CONSTRAINTS, modifiedConstraints); + return config; + } + + public static Config createRoundRobinClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + Map> pools, Map genericResourceMap) { + Config config = createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); + config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_MAP, genericResourceMap); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, RoundRobinResourceAwareStrategy.class.getName()); return config; } @@ -461,53 +477,73 @@ private static boolean isContain(String source, String subItem) { return m.find(); } - public static void assertTopologiesNotScheduled(Cluster cluster, String... topoNames) { + public static void assertTopologiesNotScheduled(Cluster cluster, Class strategyClass, String... topoNames) { Topologies topologies = cluster.getTopologies(); for (String topoName : topoNames) { TopologyDetails td = topologies.getByName(topoName); - assert (td != null) : topoName; + String errMsg = "topology " + topoName + " using " + strategyClass.getName(); + assert (td != null) : errMsg; String topoId = td.getId(); String status = cluster.getStatus(topoId); - assert (status != null) : topoName; - assert (!isStatusSuccess(status)) : topoName; - assert (cluster.getAssignmentById(topoId) == null) : topoName; - assert (cluster.needsSchedulingRas(td)) : topoName; + assert (status != null) : errMsg; + assert (!isStatusSuccess(status)) : errMsg; + assert (cluster.getAssignmentById(topoId) == null) : errMsg; + assert (cluster.needsSchedulingRas(td)) : errMsg; } } - public static void assertTopologiesFullyScheduled(Cluster cluster, String... topoNames) { + public static void assertTopologiesFullyScheduled(Cluster cluster, Class strategyClass, String... topoNames) { Topologies topologies = cluster.getTopologies(); for (String topoName : topoNames) { TopologyDetails td = topologies.getByName(topoName); - assert (td != null) : topoName; + String errMsg = "topology " + topoName + " using " + strategyClass.getName(); + assert (td != null) : errMsg; String topoId = td.getId(); assertStatusSuccess(cluster, topoId); - assert (cluster.getAssignmentById(topoId) != null) : topoName; - assert (cluster.needsSchedulingRas(td) == false) : topoName; + assert (cluster.getAssignmentById(topoId) != null): errMsg; + assert (cluster.needsSchedulingRas(td) == false): errMsg; + } + } + + public static void assertTopologiesFullyScheduled(Cluster cluster, Class strategyClass, int expectedScheduledCnt) { + List toposScheduled = new ArrayList<>(); + for (TopologyDetails td: cluster.getTopologies()) { + String topoId = td.getId(); + if (!isStatusSuccess(cluster.getStatus(topoId)) + || cluster.getAssignmentById(topoId) == null + || cluster.needsSchedulingRas(td)) { + continue; + } + toposScheduled.add(td.getName()); } + String errMsg = String.format("Only following topologies are scheduled: %s using %s", + String.join(",", toposScheduled), strategyClass.getName()); + assertEquals(expectedScheduledCnt, toposScheduled.size(), errMsg); } - public static void assertTopologiesBeenEvicted(Cluster cluster, Set evictedTopologies, String... topoNames) { + public static void assertTopologiesBeenEvicted(Cluster cluster, Class strategyClass, Set evictedTopologies, String... topoNames) { Topologies topologies = cluster.getTopologies(); LOG.info("Evicted topos: {}", evictedTopologies); assert (evictedTopologies != null); for (String topoName : topoNames) { + String errMsg = "topology " + topoName + " using " + strategyClass.getName(); TopologyDetails td = topologies.getByName(topoName); - assert (td != null) : topoName; + assert (td != null) : errMsg; String topoId = td.getId(); - assert (evictedTopologies.contains(topoId)) : topoName; + assert (evictedTopologies.contains(topoId)) : errMsg; } } - public static void assertTopologiesNotBeenEvicted(Cluster cluster, Set evictedTopologies, String... topoNames) { + public static void assertTopologiesNotBeenEvicted(Cluster cluster, Class strategyClass, Set evictedTopologies, String... topoNames) { Topologies topologies = cluster.getTopologies(); LOG.info("Evicted topos: {}", evictedTopologies); assert (evictedTopologies != null); for (String topoName : topoNames) { + String errMsg = "topology " + topoName + " using " + strategyClass.getName(); TopologyDetails td = topologies.getByName(topoName); - assert (td != null) : topoName; + assert (td != null) : errMsg; String topoId = td.getId(); - assert (!evictedTopologies.contains(topoId)) : topoName; + assert (!evictedTopologies.contains(topoId)) : errMsg; } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/eviction/TestDefaultEvictionStrategy.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/eviction/TestDefaultEvictionStrategy.java index 304043b50e6..1f0302d6475 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/eviction/TestDefaultEvictionStrategy.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/eviction/TestDefaultEvictionStrategy.java @@ -27,6 +27,10 @@ import org.apache.storm.scheduler.resource.ResourceAwareScheduler; import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; import org.slf4j.Logger; @@ -42,17 +46,20 @@ public class TestDefaultEvictionStrategy { private static final Logger LOG = LoggerFactory.getLogger(TestDefaultEvictionStrategy.class); + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + DefaultResourceAwareStrategyOld.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; private int currentTime = 1450418597; private IScheduler scheduler = null; - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - - private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + private Config createClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, Map> pools) { Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @@ -71,120 +78,126 @@ public void cleanup() { */ @Test public void testEviction() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 200, 2000), - userRes("bobby", 100, 1000), - userRes("derek", 200, 2000)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - Topologies topologies = new Topologies( - genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry"), - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby"), - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); - - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-3", "topo-4"); - - //user jerry submits another topology - topologies = addTopologies(topologies, - genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry")); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - - //topo-3 evicted (lowest priority) - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-4", "topo-6"); - assertTopologiesNotScheduled(cluster, "topo-3"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 200, 2000), + userRes("bobby", 100, 1000), + userRes("derek", 200, 2000)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + Topologies topologies = new Topologies( + genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry"), + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby"), + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); + + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-3", "topo-4"); + + //user jerry submits another topology + topologies = addTopologies(topologies, + genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry")); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + //topo-3 evicted (lowest priority) + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-4", "topo-6"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-3"); + } } @Test public void testEvictMultipleTopologies() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 200, 2000), - userRes("derek", 100, 1000)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby"), - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 29, "derek"), - genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - LOG.info("\n\n\t\tScheduling topos 2 to 5..."); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone scheduling..."); - assertTopologiesFullyScheduled(cluster, "topo-2", "topo-3", "topo-4", "topo-5"); - - //user jerry submits another topology - topologies = addTopologies(topologies, - genTopology("topo-1", config, 2, 0, 1, 0, currentTime - 2, 10, "jerry")); - cluster = new Cluster(cluster, topologies); - LOG.info("\n\n\t\tScheduling topos 1 to 5"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone scheduling..."); - //bobby has no guarantee so topo-2 and topo-3 evicted - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-4", "topo-5"); - assertTopologiesNotScheduled(cluster, "topo-2", "topo-3"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 200, 2000), + userRes("derek", 100, 1000)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + + Topologies topologies = new Topologies( + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby"), + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 29, "derek"), + genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + LOG.info("\n\n\t\tScheduling topos 2 to 5..."); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone scheduling..."); + assertTopologiesFullyScheduled(cluster, strategyClass,"topo-2", "topo-3", "topo-4", "topo-5"); + + //user jerry submits another topology + topologies = addTopologies(topologies, + genTopology("topo-1", config, 2, 0, 1, 0, currentTime - 2, 10, "jerry")); + cluster = new Cluster(cluster, topologies); + LOG.info("\n\n\t\tScheduling topos 1 to 5"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone scheduling..."); + //bobby has no guarantee so topo-2 and topo-3 evicted + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-4", "topo-5"); + assertTopologiesNotScheduled(cluster, strategyClass,"topo-2", "topo-3"); + } } @Test public void testEvictMultipleTopologiesFromMultipleUsersInCorrectOrder() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 300, 3000), - userRes("derek", 100, 1000)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby"), - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 29, "derek"), - genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 15, 29, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "topo-2", "topo-3", "topo-4", "topo-5"); - - //user jerry submits another topology - topologies = addTopologies(topologies, - genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - - //topo-3 evicted since user bobby don't have any resource guarantees and topo-3 is the lowest priority for user bobby - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-4", "topo-5"); - assertTopologiesNotScheduled(cluster, "topo-3"); - - topologies = addTopologies(topologies, - genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - - //topo-2 evicted since user bobby don't have any resource guarantees and topo-2 is the next lowest priority for user bobby - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-4", "topo-5"); - assertTopologiesNotScheduled(cluster, "topo-2", "topo-3"); - - topologies = addTopologies(topologies, - genTopology("topo-7", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - - // since user derek has exceeded his resource guarantee while user jerry has not topo-5 or topo-4 could be evicted because they have the same priority - // but topo-4 was submitted earlier thus we choose that one to evict (somewhat arbitrary) - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-5", "topo-7"); - assertTopologiesNotScheduled(cluster, "topo-2", "topo-3", "topo-4"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 300, 3000), + userRes("derek", 100, 1000)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + + Topologies topologies = new Topologies( + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby"), + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 29, "derek"), + genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 15, 29, "derek")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-2", "topo-3", "topo-4", "topo-5"); + + //user jerry submits another topology + topologies = addTopologies(topologies, + genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + //topo-3 evicted since user bobby don't have any resource guarantees and topo-3 is the lowest priority for user bobby + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-4", "topo-5"); + assertTopologiesNotScheduled(cluster, strategyClass,"topo-3"); + + topologies = addTopologies(topologies, + genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + //topo-2 evicted since user bobby don't have any resource guarantees and topo-2 is the next lowest priority for user bobby + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-4", "topo-5"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-2", "topo-3"); + + topologies = addTopologies(topologies, + genTopology("topo-7", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + // since user derek has exceeded his resource guarantee while user jerry has not topo-5 or topo-4 could be evicted because they have the same priority + // but topo-4 was submitted earlier thus we choose that one to evict (somewhat arbitrary) + assertTopologiesFullyScheduled(cluster, strategyClass,"topo-1", "topo-5", "topo-7"); + assertTopologiesNotScheduled(cluster, strategyClass,"topo-2", "topo-3", "topo-4"); + } } /** @@ -193,51 +206,53 @@ public void testEvictMultipleTopologiesFromMultipleUsersInCorrectOrder() { */ @Test public void testEvictTopologyFromItself() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 200, 2000), - userRes("bobby", 100, 1000), - userRes("derek", 100, 1000)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), - genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - LOG.info("\n\n\t\tScheduling topos 1,2,5,6"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone Scheduling..."); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-5", "topo-6"); - - //user jerry submits another topology into a full cluster - // topo3 should not be able to scheduled - topologies = addTopologies(topologies, - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 29, "jerry")); - cluster = new Cluster(cluster, topologies); - LOG.info("\n\n\t\tScheduling topos 1,2,3,5,6"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone Scheduling..."); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-5", "topo-6"); - assertTopologiesNotScheduled(cluster, "topo-3"); - - //user jerry submits another topology but this one should be scheduled since it has higher priority than the - //rest of jerry's running topologies - topologies = addTopologies(topologies, - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); - cluster = new Cluster(cluster, topologies); - LOG.info("\n\n\t\tScheduling topos 1-6"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone Scheduling..."); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-4", "topo-5", "topo-6"); - assertTopologiesNotScheduled(cluster, "topo-2", "topo-3"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 200, 2000), + userRes("bobby", 100, 1000), + userRes("derek", 100, 1000)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + + Topologies topologies = new Topologies( + genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), + genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + LOG.info("\n\n\t\tScheduling topos 1,2,5,6"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone Scheduling..."); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-5", "topo-6"); + + //user jerry submits another topology into a full cluster + // topo3 should not be able to scheduled + topologies = addTopologies(topologies, + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 29, "jerry")); + cluster = new Cluster(cluster, topologies); + LOG.info("\n\n\t\tScheduling topos 1,2,3,5,6"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone Scheduling..."); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-2", "topo-5", "topo-6"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-3"); + + //user jerry submits another topology but this one should be scheduled since it has higher priority than the + //rest of jerry's running topologies + topologies = addTopologies(topologies, + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry")); + cluster = new Cluster(cluster, topologies); + LOG.info("\n\n\t\tScheduling topos 1-6"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone Scheduling..."); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-4", "topo-5", "topo-6"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-2", "topo-3"); + } } /** @@ -245,50 +260,52 @@ public void testEvictTopologyFromItself() { */ @Test public void testOverGuaranteeEviction() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100, 1000); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 70, 700), - userRes("bobby", 100, 1000), - userRes("derek", 25, 250)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - - Topologies topologies = new Topologies( - genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), - genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), - genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - LOG.info("\n\n\t\tScheduling topos 1,3,4,5"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone scheduling..."); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-3", "topo-4", "topo-5"); - - //user derek submits another topology into a full cluster - //topo6 should not be able to scheduled initially, but since topo6 has higher priority than topo5 - //topo5 will be evicted so that topo6 can be scheduled - topologies = addTopologies(topologies, - genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10, "derek")); - cluster = new Cluster(cluster, topologies); - LOG.info("\n\n\t\tScheduling topos 1,3,4,5,6"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone scheduling..."); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-3", "topo-4", "topo-6"); - assertTopologiesNotScheduled(cluster, "topo-5"); - - //user jerry submits topo2 - topologies = addTopologies(topologies, - genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry")); - cluster = new Cluster(cluster, topologies); - LOG.info("\n\n\t\tScheduling topos 1-6"); - scheduler.schedule(topologies, cluster); - LOG.info("\n\n\t\tDone scheduling..."); - - assertTopologiesFullyScheduled(cluster, "topo-1", "topo-3", "topo-4", "topo-6"); - assertTopologiesNotScheduled(cluster, "topo-2", "topo-5"); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100, 1000); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 70, 700), + userRes("bobby", 100, 1000), + userRes("derek", 25, 250)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + + Topologies topologies = new Topologies( + genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), + genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), + genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + LOG.info("\n\n\t\tScheduling topos 1,3,4,5"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone scheduling..."); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-3", "topo-4", "topo-5"); + + //user derek submits another topology into a full cluster + //topo6 should not be able to scheduled initially, but since topo6 has higher priority than topo5 + //topo5 will be evicted so that topo6 can be scheduled + topologies = addTopologies(topologies, + genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 10, "derek")); + cluster = new Cluster(cluster, topologies); + LOG.info("\n\n\t\tScheduling topos 1,3,4,5,6"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone scheduling..."); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-3", "topo-4", "topo-6"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-5"); + + //user jerry submits topo2 + topologies = addTopologies(topologies, + genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry")); + cluster = new Cluster(cluster, topologies); + LOG.info("\n\n\t\tScheduling topos 1-6"); + scheduler.schedule(topologies, cluster); + LOG.info("\n\n\t\tDone scheduling..."); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1", "topo-3", "topo-4", "topo-6"); + assertTopologiesNotScheduled(cluster, strategyClass,"topo-2", "topo-5"); + } } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestFIFOSchedulingPriorityStrategy.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestFIFOSchedulingPriorityStrategy.java index c479f90bb14..7977cfaff41 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestFIFOSchedulingPriorityStrategy.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestFIFOSchedulingPriorityStrategy.java @@ -27,6 +27,10 @@ import org.apache.storm.scheduler.resource.ResourceAwareScheduler; import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler; import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.utils.Time; import org.junit.jupiter.api.Test; @@ -42,67 +46,72 @@ public class TestFIFOSchedulingPriorityStrategy { private static final Logger LOG = LoggerFactory.getLogger(TestFIFOSchedulingPriorityStrategy.class); - - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - - private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + DefaultResourceAwareStrategyOld.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; + + private Config createClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, Map> pools) { Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @Test public void testFIFOEvictionStrategy() { - try (Time.SimulatedTime sim = new Time.SimulatedTime()) { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 100.0, 1000.0); - Map> resourceUserPool = userResourcePool( - userRes("jerry", 200.0, 2000.0)); - Config config = createClusterConfig(100, 500, 500, resourceUserPool); - config.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, FIFOSchedulingPriorityStrategy.class.getName()); - - Topologies topologies = new Topologies( - genTopology("topo-1-jerry", config, 1, 0, 1, 0, Time.currentTimeSecs() - 250, 20, "jerry"), - genTopology("topo-2-bobby", config, 1, 0, 1, 0, Time.currentTimeSecs() - 200, 10, "bobby"), - genTopology("topo-3-bobby", config, 1, 0, 1, 0, Time.currentTimeSecs() - 300, 20, "bobby"), - genTopology("topo-4-derek", config, 1, 0, 1, 0, Time.currentTimeSecs() - 201, 29, "derek")); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - - ResourceAwareScheduler rs = new ResourceAwareScheduler(); - rs.prepare(config, new StormMetricsRegistry()); - try { - rs.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "topo-1-jerry", "topo-2-bobby", "topo-3-bobby", "topo-4-derek"); - - LOG.info("\n\n\t\tINSERTING topo-5"); - //new topology needs to be scheduled - //topo-3 should be evicted since it's been up the longest - topologies = addTopologies(topologies, - genTopology("topo-5-derek", config, 1, 0, 1, 0, Time.currentTimeSecs() - 15, 29, "derek")); - - cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - rs.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "topo-1-jerry", "topo-2-bobby", "topo-4-derek", "topo-5-derek"); - assertTopologiesNotScheduled(cluster, "topo-3-bobby"); - - LOG.info("\n\n\t\tINSERTING topo-6"); - //new topology needs to be scheduled. topo-4 should be evicted. Even though topo-1 from user jerry is older, topo-1 will not be evicted - //since user jerry has enough resource guarantee - topologies = addTopologies(topologies, - genTopology("topo-6-bobby", config, 1, 0, 1, 0, Time.currentTimeSecs() - 10, 29, "bobby")); - - cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - rs.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "topo-1-jerry", "topo-2-bobby", "topo-5-derek", "topo-6-bobby"); - assertTopologiesNotScheduled(cluster, "topo-3-bobby", "topo-4-derek"); - } finally { - rs.cleanup(); + for (Class strategyClass: strategyClasses) { + try (Time.SimulatedTime sim = new Time.SimulatedTime()) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 100.0, 1000.0); + Map> resourceUserPool = userResourcePool( + userRes("jerry", 200.0, 2000.0)); + Config config = createClusterConfig(strategyClass, 100, 500, 500, resourceUserPool); + config.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, FIFOSchedulingPriorityStrategy.class.getName()); + + Topologies topologies = new Topologies( + genTopology("topo-1-jerry", config, 1, 0, 1, 0, Time.currentTimeSecs() - 250, 20, "jerry"), + genTopology("topo-2-bobby", config, 1, 0, 1, 0, Time.currentTimeSecs() - 200, 10, "bobby"), + genTopology("topo-3-bobby", config, 1, 0, 1, 0, Time.currentTimeSecs() - 300, 20, "bobby"), + genTopology("topo-4-derek", config, 1, 0, 1, 0, Time.currentTimeSecs() - 201, 29, "derek")); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + ResourceAwareScheduler rs = new ResourceAwareScheduler(); + rs.prepare(config, new StormMetricsRegistry()); + try { + rs.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1-jerry", "topo-2-bobby", "topo-3-bobby", "topo-4-derek"); + + LOG.info("\n\n\t\tINSERTING topo-5"); + //new topology needs to be scheduled + //topo-3 should be evicted since it's been up the longest + topologies = addTopologies(topologies, + genTopology("topo-5-derek", config, 1, 0, 1, 0, Time.currentTimeSecs() - 15, 29, "derek")); + + cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + rs.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1-jerry", "topo-2-bobby", "topo-4-derek", "topo-5-derek"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-3-bobby"); + + LOG.info("\n\n\t\tINSERTING topo-6"); + //new topology needs to be scheduled. topo-4 should be evicted. Even though topo-1 from user jerry is older, topo-1 will not be evicted + //since user jerry has enough resource guarantee + topologies = addTopologies(topologies, + genTopology("topo-6-bobby", config, 1, 0, 1, 0, Time.currentTimeSecs() - 10, 29, "bobby")); + + cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + rs.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "topo-1-jerry", "topo-2-bobby", "topo-5-derek", "topo-6-bobby"); + assertTopologiesNotScheduled(cluster, strategyClass, "topo-3-bobby", "topo-4-derek"); + } finally { + rs.cleanup(); + } } } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestGenericResourceAwareSchedulingPriorityStrategy.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestGenericResourceAwareSchedulingPriorityStrategy.java index 4d9b1782c99..74cc5247dcb 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestGenericResourceAwareSchedulingPriorityStrategy.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/priority/TestGenericResourceAwareSchedulingPriorityStrategy.java @@ -29,7 +29,11 @@ import org.apache.storm.scheduler.resource.ResourceAwareScheduler; import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler; import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategyOld; import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.GenericResourceAwareStrategyOld; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; import org.apache.storm.utils.Time; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; @@ -51,6 +55,10 @@ import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.userResourcePool; public class TestGenericResourceAwareSchedulingPriorityStrategy { + private static final Class[] strategyClasses = { + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; private final int currentTime = Time.currentTimeSecs(); private IScheduler scheduler = null; @@ -62,14 +70,10 @@ public void cleanup() { } } - protected Class getGenericResourceAwareStrategyClass() { - return GenericResourceAwareStrategy.class; - } - - private Config createGrasClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + private Config createGrasClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, Map> pools, Map genericResourceMap) { Config config = TestUtilsForResourceAwareScheduler.createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, pools, genericResourceMap); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getGenericResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @@ -81,36 +85,38 @@ private Config createGrasClusterConfig(double compPcore, double compOnHeap, doub */ @Test public void testDefaultSchedulingPriorityStrategyNotEvicting() { - Map requestedgenericResourcesMap = new HashMap<>(); - requestedgenericResourcesMap.put("generic.resource.1", 40.0); - // Use full memory and cpu of the cluster capacity - Config ruiConf = createGrasClusterConfig(20, 50, 50, null, requestedgenericResourcesMap); - Config ethanConf = createGrasClusterConfig(80, 400, 500, null, Collections.emptyMap()); - Topologies topologies = new Topologies( - genTopology("ethan-topo-1", ethanConf, 1, 0, 1, 0, currentTime - 2, 10, "ethan"), - genTopology("ethan-topo-2", ethanConf, 1, 0, 1, 0, currentTime - 2, 20, "ethan"), - genTopology("ethan-topo-3", ethanConf, 1, 0, 1, 0, currentTime - 2, 28, "ethan"), - genTopology("ethan-topo-4", ethanConf, 1, 0, 1, 0, currentTime - 2, 29, "ethan")); - - Topologies withNewTopo = addTopologies(topologies, - genTopology("rui-topo-1", ruiConf, 1, 0, 4, 0, currentTime - 2, 10, "rui")); - - Config config = mkClusterConfig(DefaultSchedulingPriorityStrategy.class.getName()); - Cluster cluster = mkTestCluster(topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - - scheduler.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - - cluster = new Cluster(cluster, withNewTopo); - scheduler.schedule(withNewTopo, cluster); - Map> evictedTopos = ((ResourceAwareScheduler) scheduler).getEvictedTopologiesMap(); - - assertTopologiesFullyScheduled(cluster, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - assertTopologiesNotBeenEvicted(cluster, collectMapValues(evictedTopos), "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - assertTopologiesFullyScheduled(cluster, "rui-topo-1"); + for (Class strategyClass: strategyClasses) { + Map requestedgenericResourcesMap = new HashMap<>(); + requestedgenericResourcesMap.put("generic.resource.1", 40.0); + // Use full memory and cpu of the cluster capacity + Config ruiConf = createGrasClusterConfig(strategyClass, 20, 50, 50, null, requestedgenericResourcesMap); + Config ethanConf = createGrasClusterConfig(strategyClass, 80, 400, 500, null, Collections.emptyMap()); + Topologies topologies = new Topologies( + genTopology("ethan-topo-1", ethanConf, 1, 0, 1, 0, currentTime - 2, 10, "ethan"), + genTopology("ethan-topo-2", ethanConf, 1, 0, 1, 0, currentTime - 2, 20, "ethan"), + genTopology("ethan-topo-3", ethanConf, 1, 0, 1, 0, currentTime - 2, 28, "ethan"), + genTopology("ethan-topo-4", ethanConf, 1, 0, 1, 0, currentTime - 2, 29, "ethan")); + + Topologies withNewTopo = addTopologies(topologies, + genTopology("rui-topo-1", ruiConf, 1, 0, 4, 0, currentTime - 2, 10, "rui")); + + Config config = mkClusterConfig(strategyClass, DefaultSchedulingPriorityStrategy.class.getName()); + Cluster cluster = mkTestCluster(topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + scheduler.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + + cluster = new Cluster(cluster, withNewTopo); + scheduler.schedule(withNewTopo, cluster); + Map> evictedTopos = ((ResourceAwareScheduler) scheduler).getEvictedTopologiesMap(); + + assertTopologiesFullyScheduled(cluster, strategyClass, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + assertTopologiesNotBeenEvicted(cluster, strategyClass, collectMapValues(evictedTopos), "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + assertTopologiesFullyScheduled(cluster, strategyClass, "rui-topo-1"); + } } /* @@ -124,34 +130,36 @@ public void testDefaultSchedulingPriorityStrategyNotEvicting() { */ @Test public void testDefaultSchedulingPriorityStrategyEvicting() { - Map requestedgenericResourcesMap = new HashMap<>(); - requestedgenericResourcesMap.put("generic.resource.1", 40.0); - Config ruiConf = createGrasClusterConfig(10, 10, 10, null, requestedgenericResourcesMap); - Config ethanConf = createGrasClusterConfig(60, 200, 300, null, Collections.emptyMap()); - Topologies topologies = new Topologies( - genTopology("ethan-topo-1", ethanConf, 1, 0, 1, 0, currentTime - 2, 10, "ethan"), - genTopology("ethan-topo-2", ethanConf, 1, 0, 1, 0, currentTime - 2, 20, "ethan"), - genTopology("ethan-topo-3", ethanConf, 1, 0, 1, 0, currentTime - 2, 28, "ethan"), - genTopology("ethan-topo-4", ethanConf, 1, 0, 1, 0, currentTime - 2, 29, "ethan")); - - Topologies withNewTopo = addTopologies(topologies, - genTopology("rui-topo-1", ruiConf, 1, 0, 5, 0, currentTime - 2, 10, "rui")); - - Config config = mkClusterConfig(DefaultSchedulingPriorityStrategy.class.getName()); - Cluster cluster = mkTestCluster(topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - - cluster = new Cluster(cluster, withNewTopo); - scheduler.schedule(withNewTopo, cluster); - Map> evictedTopos = ((ResourceAwareScheduler) scheduler).getEvictedTopologiesMap(); - - assertTopologiesFullyScheduled(cluster, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - assertTopologiesBeenEvicted(cluster, collectMapValues(evictedTopos), "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - assertTopologiesNotScheduled(cluster, "rui-topo-1"); + for (Class strategyClass: strategyClasses) { + Map requestedgenericResourcesMap = new HashMap<>(); + requestedgenericResourcesMap.put("generic.resource.1", 40.0); + Config ruiConf = createGrasClusterConfig(strategyClass, 10, 10, 10, null, requestedgenericResourcesMap); + Config ethanConf = createGrasClusterConfig(strategyClass, 60, 200, 300, null, Collections.emptyMap()); + Topologies topologies = new Topologies( + genTopology("ethan-topo-1", ethanConf, 1, 0, 1, 0, currentTime - 2, 10, "ethan"), + genTopology("ethan-topo-2", ethanConf, 1, 0, 1, 0, currentTime - 2, 20, "ethan"), + genTopology("ethan-topo-3", ethanConf, 1, 0, 1, 0, currentTime - 2, 28, "ethan"), + genTopology("ethan-topo-4", ethanConf, 1, 0, 1, 0, currentTime - 2, 29, "ethan")); + + Topologies withNewTopo = addTopologies(topologies, + genTopology("rui-topo-1", ruiConf, 1, 0, 5, 0, currentTime - 2, 10, "rui")); + + Config config = mkClusterConfig(strategyClass, DefaultSchedulingPriorityStrategy.class.getName()); + Cluster cluster = mkTestCluster(topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + + cluster = new Cluster(cluster, withNewTopo); + scheduler.schedule(withNewTopo, cluster); + Map> evictedTopos = ((ResourceAwareScheduler) scheduler).getEvictedTopologiesMap(); + + assertTopologiesFullyScheduled(cluster, strategyClass, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + assertTopologiesBeenEvicted(cluster, strategyClass, collectMapValues(evictedTopos), "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + assertTopologiesNotScheduled(cluster, strategyClass, "rui-topo-1"); + } } /* @@ -163,39 +171,41 @@ public void testDefaultSchedulingPriorityStrategyEvicting() { */ @Test public void testGenericSchedulingPriorityStrategyEvicting() { - Map requestedgenericResourcesMap = new HashMap<>(); - requestedgenericResourcesMap.put("generic.resource.1", 40.0); - Config ruiConf = createGrasClusterConfig(10, 10, 10, null, requestedgenericResourcesMap); - Config ethanConf = createGrasClusterConfig(60, 200, 300, null, Collections.emptyMap()); - Topologies topologies = new Topologies( - genTopology("ethan-topo-1", ethanConf, 1, 0, 1, 0, currentTime - 2, 10, "ethan"), - genTopology("ethan-topo-2", ethanConf, 1, 0, 1, 0, currentTime - 2, 20, "ethan"), - genTopology("ethan-topo-3", ethanConf, 1, 0, 1, 0, currentTime - 2, 28, "ethan"), - genTopology("ethan-topo-4", ethanConf, 1, 0, 1, 0, currentTime - 2, 29, "ethan")); - - Topologies withNewTopo = addTopologies(topologies, - genTopology("rui-topo-1", ruiConf, 1, 0, 5, 0, currentTime - 2, 10, "rui")); - - - Config config = mkClusterConfig(GenericResourceAwareSchedulingPriorityStrategy.class.getName()); - Cluster cluster = mkTestCluster(topologies, config); - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - assertTopologiesFullyScheduled(cluster, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - - cluster = new Cluster(cluster, withNewTopo); - scheduler.schedule(withNewTopo, cluster); - Map> evictedTopos = ((ResourceAwareScheduler) scheduler).getEvictedTopologiesMap(); - - assertTopologiesFullyScheduled(cluster, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - assertTopologiesNotBeenEvicted(cluster, collectMapValues(evictedTopos),"ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); - assertTopologiesNotScheduled(cluster, "rui-topo-1"); + for (Class strategyClass: strategyClasses) { + Map requestedgenericResourcesMap = new HashMap<>(); + requestedgenericResourcesMap.put("generic.resource.1", 40.0); + Config ruiConf = createGrasClusterConfig(strategyClass, 10, 10, 10, null, requestedgenericResourcesMap); + Config ethanConf = createGrasClusterConfig(strategyClass, 60, 200, 300, null, Collections.emptyMap()); + Topologies topologies = new Topologies( + genTopology("ethan-topo-1", ethanConf, 1, 0, 1, 0, currentTime - 2, 10, "ethan"), + genTopology("ethan-topo-2", ethanConf, 1, 0, 1, 0, currentTime - 2, 20, "ethan"), + genTopology("ethan-topo-3", ethanConf, 1, 0, 1, 0, currentTime - 2, 28, "ethan"), + genTopology("ethan-topo-4", ethanConf, 1, 0, 1, 0, currentTime - 2, 29, "ethan")); + + Topologies withNewTopo = addTopologies(topologies, + genTopology("rui-topo-1", ruiConf, 1, 0, 5, 0, currentTime - 2, 10, "rui")); + + + Config config = mkClusterConfig(strategyClass, GenericResourceAwareSchedulingPriorityStrategy.class.getName()); + Cluster cluster = mkTestCluster(topologies, config); + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + assertTopologiesFullyScheduled(cluster, strategyClass, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + + cluster = new Cluster(cluster, withNewTopo); + scheduler.schedule(withNewTopo, cluster); + Map> evictedTopos = ((ResourceAwareScheduler) scheduler).getEvictedTopologiesMap(); + + assertTopologiesFullyScheduled(cluster, strategyClass, "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + assertTopologiesNotBeenEvicted(cluster, strategyClass, collectMapValues(evictedTopos), "ethan-topo-1", "ethan-topo-2", "ethan-topo-3", "ethan-topo-4"); + assertTopologiesNotScheduled(cluster, strategyClass, "rui-topo-1"); + } } - private Config mkClusterConfig(String SchedulingPriorityStrategy) { + private Config mkClusterConfig(Class strategyClass, String SchedulingPriorityStrategy) { Map> resourceUserPool = userResourcePool( userRes("rui", 200, 2000), userRes("ethan", 200, 2000)); @@ -203,7 +213,7 @@ private Config mkClusterConfig(String SchedulingPriorityStrategy) { Map genericResourcesOfferedMap = new HashMap<>(); genericResourcesOfferedMap.put("generic.resource.1", 50.0); - Config config = createGrasClusterConfig(100, 500, 500, resourceUserPool, genericResourcesOfferedMap); + Config config = createGrasClusterConfig(strategyClass, 100, 500, 500, resourceUserPool, genericResourcesOfferedMap); config.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, SchedulingPriorityStrategy); config.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_MAX_TOPOLOGY_SCHEDULING_ATTEMPTS, 2); // allow 1 round of evictions diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestBackwardCompatibility.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestBackwardCompatibility.java deleted file mode 100644 index f6ed48dc9c3..00000000000 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestBackwardCompatibility.java +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.storm.scheduler.resource.strategies.scheduling; - -import org.apache.storm.TestRebalance; -import org.apache.storm.daemon.nimbus.NimbusTest; -import org.apache.storm.generated.InvalidTopologyException; -import org.apache.storm.scheduler.blacklist.TestBlacklistScheduler; -import org.apache.storm.scheduler.resource.TestResourceAwareScheduler; -import org.apache.storm.scheduler.resource.TestUser; -import org.apache.storm.scheduler.resource.strategies.eviction.TestDefaultEvictionStrategy; -import org.apache.storm.scheduler.resource.strategies.priority.TestFIFOSchedulingPriorityStrategy; -import org.apache.storm.scheduler.resource.strategies.priority.TestGenericResourceAwareSchedulingPriorityStrategy; -import org.apache.storm.testing.PerformanceTest; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.EnumSource; - -/** - * Test for backward compatibility. - * - *

- * {@link GenericResourceAwareStrategyOld} class behavior is supposed to be compatible - * with the prior version of {@link GenericResourceAwareStrategy} and - * {@link DefaultResourceAwareStrategyOld} class behavior is supposed to be compatible - * with the prior version of {@link DefaultResourceAwareStrategy}. - *

- * - * The tests in this class wrap tests in other classes while replacing Strategy classes. - * The wrapped classes have protected methods that return strategy classes. These methods - * are overridden to return backward compatible class. - */ -public class TestBackwardCompatibility { - - TestGenericResourceAwareStrategy testGenericResourceAwareStrategy; - TestResourceAwareScheduler testResourceAwareScheduler; - TestBlacklistScheduler testBlacklistScheduler; - NimbusTest nimbusTest; - TestRebalance testRebalance; - TestGenericResourceAwareSchedulingPriorityStrategy testGenericResourceAwareSchedulingPriorityStrategy; - - TestDefaultResourceAwareStrategy testDefaultResourceAwareStrategy; - TestFIFOSchedulingPriorityStrategy testFIFOSchedulingPriorityStrategy; - TestDefaultEvictionStrategy testDefaultEvictionStrategy; - TestUser testUser; - - public TestBackwardCompatibility() { - // Create instances of wrapped test classes and override strategy class methods - testGenericResourceAwareStrategy = new TestGenericResourceAwareStrategy() { - @Override - protected Class getGenericResourceAwareStrategyClass() { - return GenericResourceAwareStrategyOld.class; - } - }; - testResourceAwareScheduler = new TestResourceAwareScheduler() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - - @Override - protected Class getGenericResourceAwareStrategyClass() { - return GenericResourceAwareStrategyOld.class; - } - }; - testBlacklistScheduler = new TestBlacklistScheduler() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - nimbusTest = new NimbusTest() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - testRebalance = new TestRebalance() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - testGenericResourceAwareSchedulingPriorityStrategy = new TestGenericResourceAwareSchedulingPriorityStrategy() { - @Override - protected Class getGenericResourceAwareStrategyClass() { - return GenericResourceAwareStrategyOld.class; - } - }; - testDefaultResourceAwareStrategy = new TestDefaultResourceAwareStrategy() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - testFIFOSchedulingPriorityStrategy = new TestFIFOSchedulingPriorityStrategy() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - testDefaultEvictionStrategy = new TestDefaultEvictionStrategy() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - testUser = new TestUser() { - @Override - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategyOld.class; - } - }; - - } - - /********************************************************************************** - * Tests for testGenericResourceAwareStrategy - ***********************************************************************************/ - - @Test - public void testGenericResourceAwareStrategySharedMemory() { - testGenericResourceAwareStrategy.testGenericResourceAwareStrategySharedMemory(); - } - - @Test - public void testGenericResourceAwareStrategy() - throws InvalidTopologyException { - testGenericResourceAwareStrategy.testGenericResourceAwareStrategyWithoutSettingAckerExecutors(0); - } - - @Test - public void testGenericResourceAwareStrategyInFavorOfShuffle() - throws InvalidTopologyException { - testGenericResourceAwareStrategy.testGenericResourceAwareStrategyInFavorOfShuffle(); - } - - @Test - public void testGrasRequiringEviction() { - testGenericResourceAwareStrategy.testGrasRequiringEviction(); - } - - @Test - public void testAntiAffinityWithMultipleTopologies() { - testGenericResourceAwareStrategy.testAntiAffinityWithMultipleTopologies(); - } - - /********************************************************************************** - * Tests for testResourceAwareScheduler - ***********************************************************************************/ - - @PerformanceTest - @Test - public void testLargeTopologiesOnLargeClusters() { - testResourceAwareScheduler.testLargeTopologiesOnLargeClusters(); - } - - @PerformanceTest - @Test - public void testLargeTopologiesOnLargeClustersGras() { - testResourceAwareScheduler.testLargeTopologiesOnLargeClustersGras(); - } - - @Test - public void testHeterogeneousClusterwithGras() { - testResourceAwareScheduler.testHeterogeneousClusterwithGras(); - } - - @Test - public void testRASNodeSlotAssign() { - testResourceAwareScheduler.testRASNodeSlotAssign(); - } - - @Test - public void sanityTestOfScheduling() { - testResourceAwareScheduler.sanityTestOfScheduling(); - } - - @Test - public void testTopologyWithMultipleSpouts() { - testResourceAwareScheduler.testTopologyWithMultipleSpouts(); - } - - @Test - public void testTopologySetCpuAndMemLoad() { - testResourceAwareScheduler.testTopologySetCpuAndMemLoad(); - } - - @Test - public void testResourceLimitation() { - testResourceAwareScheduler.testResourceLimitation(); - } - - @Test - public void testScheduleResilience() { - testResourceAwareScheduler.testScheduleResilience(); - } - - @Test - public void testHeterogeneousClusterwithDefaultRas() { - testResourceAwareScheduler.testHeterogeneousClusterwithDefaultRas(); - } - - @Test - public void testTopologyWorkerMaxHeapSize() { - testResourceAwareScheduler.testTopologyWorkerMaxHeapSize(); - } - - @Test - public void testReadInResourceAwareSchedulerUserPools() { - testResourceAwareScheduler.testReadInResourceAwareSchedulerUserPools(); - } - - @Test - public void testSubmitUsersWithNoGuarantees() { - testResourceAwareScheduler.testSubmitUsersWithNoGuarantees(); - } - - @Test - public void testMultipleUsers() { - testResourceAwareScheduler.testMultipleUsers(); - } - - @Test - public void testHandlingClusterSubscription() { - testResourceAwareScheduler.testHandlingClusterSubscription(); - } - - @Test - public void testFaultTolerance() { - testResourceAwareScheduler.testFaultTolerance(); - } - - @Test - public void testNodeFreeSlot() { - testResourceAwareScheduler.testNodeFreeSlot(); - } - - @Test - public void testSchedulingAfterFailedScheduling() { - testResourceAwareScheduler.testSchedulingAfterFailedScheduling(); - } - - @Test - public void minCpuWorkerJustFits() { - testResourceAwareScheduler.minCpuWorkerJustFits(); - } - - @Test - public void minCpuPreventsThirdTopo() { - testResourceAwareScheduler.minCpuPreventsThirdTopo(); - } - - @Test - public void testMinCpuMaxMultipleSupervisors() { - testResourceAwareScheduler.testMinCpuMaxMultipleSupervisors(); - } - - @Test - public void minCpuWorkerSplitFails() { - testResourceAwareScheduler.minCpuWorkerSplitFails(); - } - - @Test - public void TestLargeFragmentedClusterScheduling() { - testResourceAwareScheduler.TestLargeFragmentedClusterScheduling(); - } - - @Test - public void testMultipleSpoutsAndCyclicTopologies() { - testResourceAwareScheduler.testMultipleSpoutsAndCyclicTopologies(); - } - - @Test - public void testSchedulerStrategyWhitelist() { - testResourceAwareScheduler.testSchedulerStrategyWhitelist(); - } - - @Test - public void testSchedulerStrategyWhitelistException() { - testResourceAwareScheduler.testSchedulerStrategyWhitelistException(); - } - - @Test - public void testSchedulerStrategyEmptyWhitelist() { - testResourceAwareScheduler.testSchedulerStrategyEmptyWhitelist(); - } - - @Test - public void testStrategyTakingTooLong() { - testResourceAwareScheduler.testStrategyTakingTooLong(); - } - - /********************************************************************************** - * Tests for TestBlackListScheduler - ***********************************************************************************/ - @Test - public void TestGreylist() { - testBlacklistScheduler.TestGreylist(); - } - - /********************************************************************************** - * Tests for NimbusTest - ***********************************************************************************/ - @Test - public void testMemoryLoadLargerThanMaxHeapSize() throws Exception { - nimbusTest.testMemoryLoadLargerThanMaxHeapSize(); - } - - /********************************************************************************** - * Tests for TestRebalance - ***********************************************************************************/ - @Test - public void testRebalanceTopologyResourcesAndConfigs() throws Exception { - testRebalance.testRebalanceTopologyResourcesAndConfigs(); - } - - /********************************************************************************** - * Tests for testGenericResourceAwareSchedulingPriorityStrategy - ***********************************************************************************/ - @Test - public void testDefaultSchedulingPriorityStrategyNotEvicting() { - testGenericResourceAwareSchedulingPriorityStrategy.testDefaultSchedulingPriorityStrategyNotEvicting(); - } - - @Test - public void testDefaultSchedulingPriorityStrategyEvicting() { - testGenericResourceAwareSchedulingPriorityStrategy.testDefaultSchedulingPriorityStrategyEvicting(); - } - - @Test - public void testGenericSchedulingPriorityStrategyEvicting() { - testGenericResourceAwareSchedulingPriorityStrategy.testGenericSchedulingPriorityStrategyEvicting(); - } - - /********************************************************************************** - * Tests for testDefaultResourceAwareStrategy - ***********************************************************************************/ - - @Test - public void testSchedulingNegativeResources() { - testDefaultResourceAwareStrategy.testSchedulingNegativeResources(); - } - - @ParameterizedTest - @EnumSource(TestDefaultResourceAwareStrategy.WorkerRestrictionType.class) - public void testDefaultResourceAwareStrategySharedMemory(TestDefaultResourceAwareStrategy.WorkerRestrictionType schedulingLimitation) { - testDefaultResourceAwareStrategy.testDefaultResourceAwareStrategySharedMemory(schedulingLimitation); - } - - @Test - public void testDefaultResourceAwareStrategy() - throws InvalidTopologyException { - testDefaultResourceAwareStrategy.testDefaultResourceAwareStrategyWithoutSettingAckerExecutors(0); - } - - @Test - public void testDefaultResourceAwareStrategyInFavorOfShuffle() - throws InvalidTopologyException { - testDefaultResourceAwareStrategy.testDefaultResourceAwareStrategyInFavorOfShuffle(); - } - - @Test - public void testMultipleRacks() { - testDefaultResourceAwareStrategy.testMultipleRacks(); - } - - @Test - public void testMultipleRacksWithFavoritism() { - testDefaultResourceAwareStrategy.testMultipleRacksWithFavoritism(); - } - - /********************************************************************************** - * Tests for TestFIFOSchedulingPriorityStrategy - ***********************************************************************************/ - - @Test - public void testFIFOEvictionStrategy() { - testFIFOSchedulingPriorityStrategy.testFIFOEvictionStrategy(); - } - - /********************************************************************************** - * Tests for TestDefaultEvictionStrategy - ***********************************************************************************/ - - @Test - public void testEviction() { - testDefaultEvictionStrategy.testEviction(); - } - - @Test - public void testEvictMultipleTopologies() { - testDefaultEvictionStrategy.testEvictMultipleTopologies(); - } - - @Test - public void testEvictMultipleTopologiesFromMultipleUsersInCorrectOrder() { - testDefaultEvictionStrategy.testEvictMultipleTopologiesFromMultipleUsersInCorrectOrder(); - } - - @Test - public void testEvictTopologyFromItself() { - testDefaultEvictionStrategy.testEvictTopologyFromItself(); - } - - @Test - public void testOverGuaranteeEviction() { - testDefaultEvictionStrategy.testOverGuaranteeEviction(); - } - - /********************************************************************************** - * Tests for TestUser - ***********************************************************************************/ - - @Test - public void testResourcePoolUtilization() { - testUser.testResourcePoolUtilization(); - } -} \ No newline at end of file diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestDefaultResourceAwareStrategy.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestDefaultResourceAwareStrategy.java index 1f5d9bdcaa2..1b8dae8f128 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestDefaultResourceAwareStrategy.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestDefaultResourceAwareStrategy.java @@ -87,6 +87,10 @@ @ExtendWith({NormalizedResourcesExtension.class}) public class TestDefaultResourceAwareStrategy { + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + DefaultResourceAwareStrategyOld.class, + }; private static final int CURRENT_TIME = 1450418597; private static IScheduler scheduler = null; private enum SharedMemoryType { @@ -100,14 +104,10 @@ protected enum WorkerRestrictionType { WORKER_RESTRICTION_NONE } - protected Class getDefaultResourceAwareStrategyClass() { - return DefaultResourceAwareStrategy.class; - } - - private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, - Map> pools) { + private Config createClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, + Map> pools) { Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getDefaultResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @@ -145,86 +145,88 @@ public void cleanup() { @ParameterizedTest @EnumSource(SharedMemoryType.class) public void testMultipleSharedMemoryWithOneExecutorPerWorker(SharedMemoryType memoryType) { - int spoutParallelism = 4; - double cpuPercent = 10; - double memoryOnHeap = 10; - double memoryOffHeap = 10; - double sharedOnHeapWithinWorker = 450; - double sharedOffHeapWithinNode = 600; - double sharedOffHeapWithinWorker = 400; - - TopologyBuilder builder = new TopologyBuilder(); - switch (memoryType) { - case SHARED_OFF_HEAP_NODE: - builder.setSpout("spout", new TestSpout(), spoutParallelism) - .addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "spout shared off heap within node")); - break; - case SHARED_OFF_HEAP_WORKER: - builder.setSpout("spout", new TestSpout(), spoutParallelism) - .addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "spout shared off heap within worker")); - break; - case SHARED_ON_HEAP_WORKER: - builder.setSpout("spout", new TestSpout(), spoutParallelism) - .addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "spout shared on heap within worker")); - break; - } - StormTopology stormToplogy = builder.createTopology(); - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 500, 1000); - Config conf = createClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null); - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, "testTopology"); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - conf.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true); - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, - genExecsAndComps(stormToplogy), CURRENT_TIME, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - TopologyResources topologyResources = cluster.getTopologyResourcesMap().get(topo.getId()); - SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); - long numNodes = assignment.getSlotToExecutors().keySet().stream().map(WorkerSlot::getNodeId).distinct().count(); - - switch (memoryType) { - case SHARED_OFF_HEAP_NODE: - // 4 workers on single node. OffHeapNode memory is shared - assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); - assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap + sharedOffHeapWithinNode, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(0, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(sharedOffHeapWithinNode, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); - assertThat(numNodes, is(1L)); - assertThat(cluster.getAssignedNumWorkers(topo), is(spoutParallelism)); - break; - case SHARED_OFF_HEAP_WORKER: - // 4 workers on 2 nodes. OffHeapWorker memory not shared -- consumed 4x, once for each worker - assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); - assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(spoutParallelism * (memoryOffHeap + sharedOffHeapWithinWorker), 0.01)); - assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(0, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(spoutParallelism * sharedOffHeapWithinWorker, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); - assertThat(numNodes, is(2L)); - assertThat(cluster.getAssignedNumWorkers(topo), is(spoutParallelism)); - break; - case SHARED_ON_HEAP_WORKER: - // 4 workers on 2 nodes. onHeap memory not shared -- consumed 4x, once for each worker - assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(spoutParallelism * (memoryOnHeap + sharedOnHeapWithinWorker), 0.01)); - assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(spoutParallelism * sharedOnHeapWithinWorker, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(0, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); - assertThat(numNodes, is(2L)); - assertThat(cluster.getAssignedNumWorkers(topo), is(spoutParallelism)); - break; + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 4; + double cpuPercent = 10; + double memoryOnHeap = 10; + double memoryOffHeap = 10; + double sharedOnHeapWithinWorker = 450; + double sharedOffHeapWithinNode = 600; + double sharedOffHeapWithinWorker = 400; + + TopologyBuilder builder = new TopologyBuilder(); + switch (memoryType) { + case SHARED_OFF_HEAP_NODE: + builder.setSpout("spout", new TestSpout(), spoutParallelism) + .addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "spout shared off heap within node")); + break; + case SHARED_OFF_HEAP_WORKER: + builder.setSpout("spout", new TestSpout(), spoutParallelism) + .addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "spout shared off heap within worker")); + break; + case SHARED_ON_HEAP_WORKER: + builder.setSpout("spout", new TestSpout(), spoutParallelism) + .addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "spout shared on heap within worker")); + break; + } + StormTopology stormToplogy = builder.createTopology(); + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 500, 1000); + Config conf = createClusterConfig(strategyClass, cpuPercent, memoryOnHeap, memoryOffHeap, null); + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, "testTopology"); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + conf.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true); + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, + genExecsAndComps(stormToplogy), CURRENT_TIME, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + TopologyResources topologyResources = cluster.getTopologyResourcesMap().get(topo.getId()); + SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); + long numNodes = assignment.getSlotToExecutors().keySet().stream().map(WorkerSlot::getNodeId).distinct().count(); + + switch (memoryType) { + case SHARED_OFF_HEAP_NODE: + // 4 workers on single node. OffHeapNode memory is shared + assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); + assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap + sharedOffHeapWithinNode, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(0, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(sharedOffHeapWithinNode, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); + assertThat(numNodes, is(1L)); + assertThat(cluster.getAssignedNumWorkers(topo), is(spoutParallelism)); + break; + case SHARED_OFF_HEAP_WORKER: + // 4 workers on 2 nodes. OffHeapWorker memory not shared -- consumed 4x, once for each worker + assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); + assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(spoutParallelism * (memoryOffHeap + sharedOffHeapWithinWorker), 0.01)); + assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(0, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(spoutParallelism * sharedOffHeapWithinWorker, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); + assertThat(numNodes, is(2L)); + assertThat(cluster.getAssignedNumWorkers(topo), is(spoutParallelism)); + break; + case SHARED_ON_HEAP_WORKER: + // 4 workers on 2 nodes. onHeap memory not shared -- consumed 4x, once for each worker + assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(spoutParallelism * (memoryOnHeap + sharedOnHeapWithinWorker), 0.01)); + assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(spoutParallelism * sharedOnHeapWithinWorker, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(0, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(spoutParallelism * memoryOnHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(spoutParallelism * memoryOffHeap, 0.01)); + assertThat(numNodes, is(2L)); + assertThat(cluster.getAssignedNumWorkers(topo), is(spoutParallelism)); + break; + } } } @@ -233,68 +235,70 @@ public void testMultipleSharedMemoryWithOneExecutorPerWorker(SharedMemoryType me */ @Test public void testSchedulingNegativeResources() { - int spoutParallelism = 2; - int boltParallelism = 2; - double cpuPercent = 10; - double memoryOnHeap = 10; - double memoryOffHeap = 10; - double sharedOnHeapWithinWorker = 400; - double sharedOffHeapWithinNode = 700; - double sharedOffHeapWithinWorker = 500; - - Config conf = createClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null); - TopologyDetails[] topo = new TopologyDetails[2]; - - // 1st topology - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "bolt-1 shared off heap within worker")).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "bolt-2 shared off heap within node")).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "bolt-3 shared on heap within worker")).shuffleGrouping("bolt-2"); - StormTopology stormTopology = builder.createTopology(); - - conf.put(Config.TOPOLOGY_PRIORITY, 1); - conf.put(Config.TOPOLOGY_NAME, "testTopology-0"); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - topo[0] = new TopologyDetails("testTopology-id-0", conf, stormTopology, 0, - genExecsAndComps(stormTopology), CURRENT_TIME, "user"); - - // 2nd topology - builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "spout shared off heap within node")); - stormTopology = builder.createTopology(); - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, "testTopology-1"); - topo[1] = new TopologyDetails("testTopology-id-1", conf, stormTopology, 0, - genExecsAndComps(stormTopology), CURRENT_TIME, "user"); - - Map supMap = genSupervisors(1, 4, 500, 2000); - Topologies topologies = new Topologies(topo[0]); - Cluster cluster = new Cluster(new INimbusTest(), new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - // schedule 1st topology - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - assertTopologiesFullyScheduled(cluster, topo[0].getName()); - - // attempt scheduling both topologies. - // this triggered negative resource event as the second topology incorrectly scheduled with the first in place - // first topology should get evicted for higher priority (lower value) second topology to successfully schedule - topologies = new Topologies(topo[0], topo[1]); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - assertTopologiesNotScheduled(cluster, topo[0].getName()); - assertTopologiesFullyScheduled(cluster, topo[1].getName()); - - // check negative resource count - assertThat(cluster.getResourceMetrics().getNegativeResourceEventsMeter().getCount(), is(0L)); + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 2; + int boltParallelism = 2; + double cpuPercent = 10; + double memoryOnHeap = 10; + double memoryOffHeap = 10; + double sharedOnHeapWithinWorker = 400; + double sharedOffHeapWithinNode = 700; + double sharedOffHeapWithinWorker = 500; + + Config conf = createClusterConfig(strategyClass, cpuPercent, memoryOnHeap, memoryOffHeap, null); + TopologyDetails[] topo = new TopologyDetails[2]; + + // 1st topology + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "bolt-1 shared off heap within worker")).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "bolt-2 shared off heap within node")).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "bolt-3 shared on heap within worker")).shuffleGrouping("bolt-2"); + StormTopology stormTopology = builder.createTopology(); + + conf.put(Config.TOPOLOGY_PRIORITY, 1); + conf.put(Config.TOPOLOGY_NAME, "testTopology-0"); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + topo[0] = new TopologyDetails("testTopology-id-0", conf, stormTopology, 0, + genExecsAndComps(stormTopology), CURRENT_TIME, "user"); + + // 2nd topology + builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "spout shared off heap within node")); + stormTopology = builder.createTopology(); + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, "testTopology-1"); + topo[1] = new TopologyDetails("testTopology-id-1", conf, stormTopology, 0, + genExecsAndComps(stormTopology), CURRENT_TIME, "user"); + + Map supMap = genSupervisors(1, 4, 500, 2000); + Topologies topologies = new Topologies(topo[0]); + Cluster cluster = new Cluster(new INimbusTest(), new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + // schedule 1st topology + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertTopologiesFullyScheduled(cluster, strategyClass, topo[0].getName()); + + // attempt scheduling both topologies. + // this triggered negative resource event as the second topology incorrectly scheduled with the first in place + // first topology should get evicted for higher priority (lower value) second topology to successfully schedule + topologies = new Topologies(topo[0], topo[1]); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + assertTopologiesNotScheduled(cluster, strategyClass, topo[0].getName()); + assertTopologiesFullyScheduled(cluster, strategyClass, topo[1].getName()); + + // check negative resource count + assertThat(cluster.getResourceMetrics().getNegativeResourceEventsMeter().getCount(), is(0L)); + } } /** @@ -303,149 +307,151 @@ public void testSchedulingNegativeResources() { @ParameterizedTest @EnumSource(WorkerRestrictionType.class) public void testDefaultResourceAwareStrategySharedMemory(WorkerRestrictionType schedulingLimitation) { - int spoutParallelism = 2; - int boltParallelism = 2; - int numBolts = 3; - double cpuPercent = 10; - double memoryOnHeap = 10; - double memoryOffHeap = 10; - double sharedOnHeapWithinWorker = 400; - double sharedOffHeapWithinNode = 700; - double sharedOffHeapWithinWorker = 600; - - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "bolt-1 shared off heap within worker")).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "bolt-2 shared off heap within node")).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "bolt-3 shared on heap within worker")).shuffleGrouping("bolt-2"); - - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 500, 2000); - Config conf = createClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null); - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, "testTopology"); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - switch (schedulingLimitation) { - case WORKER_RESTRICTION_ONE_EXECUTOR: - conf.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true); - break; - case WORKER_RESTRICTION_ONE_COMPONENT: - conf.put(Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER, true); - break; - } - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(stormTopology), CURRENT_TIME, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - // [3,3] [7,7], [0,0] [2,2] [6,6] [1,1] [5,5] [4,4] sorted executor ordering - // spout [0,0] [1,1] - // bolt-1 [2,2] [3,3] - // bolt-2 [6,6] [7,7] - // bolt-3 [4,4] [5,5] - - // WorkerRestrictionType.WORKER_RESTRICTION_NONE - // expect 1 worker, 1 node - - // WorkerRestrictionType.WORKER_RESTRICTION_ONE_EXECUTOR - // expect 8 workers, 2 nodes - // node r000s000 workers: bolt-1 bolt-2 spout bolt-1 (no memory sharing) - // node r000s001 workers: bolt-2 spout bolt-3 bolt-3 (no memory sharing) - - // WorkerRestrictionType.WORKER_RESTRICTION_ONE_COMPONENT - // expect 4 workers, 1 node - - for (Entry entry: cluster.getSupervisorsResourcesMap().entrySet()) { - String supervisorId = entry.getKey(); - SupervisorResources resources = entry.getValue(); - assertTrue(resources.getTotalCpu() >= resources.getUsedCpu(), supervisorId); - assertTrue(resources.getTotalMem() >= resources.getUsedMem(), supervisorId); - } + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 2; + int boltParallelism = 2; + int numBolts = 3; + double cpuPercent = 10; + double memoryOnHeap = 10; + double memoryOffHeap = 10; + double sharedOnHeapWithinWorker = 400; + double sharedOffHeapWithinNode = 700; + double sharedOffHeapWithinWorker = 600; + + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWithinWorker, "bolt-1 shared off heap within worker")).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapWithinNode, "bolt-2 shared off heap within node")).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeapWithinWorker, "bolt-3 shared on heap within worker")).shuffleGrouping("bolt-2"); + + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 500, 2000); + Config conf = createClusterConfig(strategyClass, cpuPercent, memoryOnHeap, memoryOffHeap, null); + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, "testTopology"); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + switch (schedulingLimitation) { + case WORKER_RESTRICTION_ONE_EXECUTOR: + conf.put(Config.TOPOLOGY_RAS_ONE_EXECUTOR_PER_WORKER, true); + break; + case WORKER_RESTRICTION_ONE_COMPONENT: + conf.put(Config.TOPOLOGY_RAS_ONE_COMPONENT_PER_WORKER, true); + break; + } + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(stormTopology), CURRENT_TIME, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + // [3,3] [7,7], [0,0] [2,2] [6,6] [1,1] [5,5] [4,4] sorted executor ordering + // spout [0,0] [1,1] + // bolt-1 [2,2] [3,3] + // bolt-2 [6,6] [7,7] + // bolt-3 [4,4] [5,5] + + // WorkerRestrictionType.WORKER_RESTRICTION_NONE + // expect 1 worker, 1 node + + // WorkerRestrictionType.WORKER_RESTRICTION_ONE_EXECUTOR + // expect 8 workers, 2 nodes + // node r000s000 workers: bolt-1 bolt-2 spout bolt-1 (no memory sharing) + // node r000s001 workers: bolt-2 spout bolt-3 bolt-3 (no memory sharing) + + // WorkerRestrictionType.WORKER_RESTRICTION_ONE_COMPONENT + // expect 4 workers, 1 node + + for (Entry entry : cluster.getSupervisorsResourcesMap().entrySet()) { + String supervisorId = entry.getKey(); + SupervisorResources resources = entry.getValue(); + assertTrue(resources.getTotalCpu() >= resources.getUsedCpu(), supervisorId); + assertTrue(resources.getTotalMem() >= resources.getUsedMem(), supervisorId); + } - int totalNumberOfTasks = spoutParallelism + boltParallelism * numBolts; - SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); - TopologyResources topologyResources = cluster.getTopologyResourcesMap().get(topo.getId()); - long numNodes = assignment.getSlotToExecutors().keySet().stream().map(WorkerSlot::getNodeId).distinct().count(); - String assignmentString = "Assignments:\n\t" + assignment.getSlotToExecutors().entrySet().stream() - .map(x -> String.format("Node=%s, components=%s", - x.getKey().getNodeId(), - x.getValue().stream() - .map(topo::getComponentFromExecutor) - .collect(Collectors.joining(",")) - ) - ) - .collect(Collectors.joining("\n\t")); - - if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_NONE) { - // Everything should fit in a single slot - double totalExpectedCPU = totalNumberOfTasks * cpuPercent; - double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker; - double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker; - - assertThat(assignment.getSlots().size(), is(1)); - WorkerSlot ws = assignment.getSlots().iterator().next(); - String nodeId = ws.getNodeId(); - assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().size(), is(1)); - assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().get(nodeId), closeTo(sharedOffHeapWithinNode, 0.01)); - assertThat(assignment.getScheduledResources().size(), is(1)); - WorkerResources resources = assignment.getScheduledResources().get(ws); - assertThat(resources.get_cpu(), closeTo(totalExpectedCPU, 0.01)); - assertThat(resources.get_mem_on_heap(), closeTo(totalExpectedOnHeap, 0.01)); - assertThat(resources.get_mem_off_heap(), closeTo(totalExpectedWorkerOffHeap, 0.01)); - assertThat(resources.get_shared_mem_on_heap(), closeTo(sharedOnHeapWithinWorker, 0.01)); - assertThat(resources.get_shared_mem_off_heap(), closeTo(sharedOffHeapWithinWorker, 0.01)); - } else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_EXECUTOR) { - double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + 2 * sharedOnHeapWithinWorker; - double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode; - double expectedMemSharedOnHeap = 2 * sharedOnHeapWithinWorker; - double expectedMemSharedOffHeap = 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode; - double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap; - double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap; - assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01)); - assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01)); - - double totalExpectedCPU = totalNumberOfTasks * cpuPercent; - assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01)); - int numAssignedWorkers = cluster.getAssignedNumWorkers(topo); - assertThat(numAssignedWorkers, is(8)); - assertThat(assignment.getSlots().size(), is(8)); - assertThat(assignmentString, numNodes, is(2L)); - } else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_COMPONENT) { - double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker; - double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker + sharedOffHeapWithinNode; - double expectedMemSharedOnHeap = sharedOnHeapWithinWorker; - double expectedMemSharedOffHeap = sharedOffHeapWithinWorker + sharedOffHeapWithinNode; - double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap; - double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap; - assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01)); - assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01)); - assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01)); - assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01)); - - double totalExpectedCPU = totalNumberOfTasks * cpuPercent; - assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01)); - int numAssignedWorkers = cluster.getAssignedNumWorkers(topo); - assertThat(numAssignedWorkers, is(4)); - assertThat(assignment.getSlots().size(), is(4)); - assertThat(numNodes, is(1L)); + int totalNumberOfTasks = spoutParallelism + boltParallelism * numBolts; + SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); + TopologyResources topologyResources = cluster.getTopologyResourcesMap().get(topo.getId()); + long numNodes = assignment.getSlotToExecutors().keySet().stream().map(WorkerSlot::getNodeId).distinct().count(); + String assignmentString = "Assignments:\n\t" + assignment.getSlotToExecutors().entrySet().stream() + .map(x -> String.format("Node=%s, components=%s", + x.getKey().getNodeId(), + x.getValue().stream() + .map(topo::getComponentFromExecutor) + .collect(Collectors.joining(",")) + ) + ) + .collect(Collectors.joining("\n\t")); + + if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_NONE) { + // Everything should fit in a single slot + double totalExpectedCPU = totalNumberOfTasks * cpuPercent; + double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker; + double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker; + + assertThat(assignment.getSlots().size(), is(1)); + WorkerSlot ws = assignment.getSlots().iterator().next(); + String nodeId = ws.getNodeId(); + assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().size(), is(1)); + assertThat(assignment.getNodeIdToTotalSharedOffHeapNodeMemory().get(nodeId), closeTo(sharedOffHeapWithinNode, 0.01)); + assertThat(assignment.getScheduledResources().size(), is(1)); + WorkerResources resources = assignment.getScheduledResources().get(ws); + assertThat(resources.get_cpu(), closeTo(totalExpectedCPU, 0.01)); + assertThat(resources.get_mem_on_heap(), closeTo(totalExpectedOnHeap, 0.01)); + assertThat(resources.get_mem_off_heap(), closeTo(totalExpectedWorkerOffHeap, 0.01)); + assertThat(resources.get_shared_mem_on_heap(), closeTo(sharedOnHeapWithinWorker, 0.01)); + assertThat(resources.get_shared_mem_off_heap(), closeTo(sharedOffHeapWithinWorker, 0.01)); + } else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_EXECUTOR) { + double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + 2 * sharedOnHeapWithinWorker; + double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode; + double expectedMemSharedOnHeap = 2 * sharedOnHeapWithinWorker; + double expectedMemSharedOffHeap = 2 * sharedOffHeapWithinWorker + 2 * sharedOffHeapWithinNode; + double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap; + double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap; + assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01)); + assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01)); + + double totalExpectedCPU = totalNumberOfTasks * cpuPercent; + assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01)); + int numAssignedWorkers = cluster.getAssignedNumWorkers(topo); + assertThat(numAssignedWorkers, is(8)); + assertThat(assignment.getSlots().size(), is(8)); + assertThat(assignmentString, numNodes, is(2L)); + } else if (schedulingLimitation == WorkerRestrictionType.WORKER_RESTRICTION_ONE_COMPONENT) { + double expectedMemOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeapWithinWorker; + double expectedMemOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWithinWorker + sharedOffHeapWithinNode; + double expectedMemSharedOnHeap = sharedOnHeapWithinWorker; + double expectedMemSharedOffHeap = sharedOffHeapWithinWorker + sharedOffHeapWithinNode; + double expectedMemNonSharedOnHeap = totalNumberOfTasks * memoryOnHeap; + double expectedMemNonSharedOffHeap = totalNumberOfTasks * memoryOffHeap; + assertThat(topologyResources.getAssignedMemOnHeap(), closeTo(expectedMemOnHeap, 0.01)); + assertThat(topologyResources.getAssignedMemOffHeap(), closeTo(expectedMemOffHeap, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOnHeap(), closeTo(expectedMemSharedOnHeap, 0.01)); + assertThat(topologyResources.getAssignedSharedMemOffHeap(), closeTo(expectedMemSharedOffHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOnHeap(), closeTo(expectedMemNonSharedOnHeap, 0.01)); + assertThat(topologyResources.getAssignedNonSharedMemOffHeap(), closeTo(expectedMemNonSharedOffHeap, 0.01)); + + double totalExpectedCPU = totalNumberOfTasks * cpuPercent; + assertThat(topologyResources.getAssignedCpu(), closeTo(totalExpectedCPU, 0.01)); + int numAssignedWorkers = cluster.getAssignedNumWorkers(topo); + assertThat(numAssignedWorkers, is(4)); + assertThat(assignment.getSlots().size(), is(4)); + assertThat(numNodes, is(1L)); + } } } @@ -468,109 +474,111 @@ public void testDefaultResourceAwareStrategySharedMemory(WorkerRestrictionType s @ParameterizedTest @ValueSource(ints = {-1, 0, 1, 2}) public void testDefaultResourceAwareStrategyWithoutSettingAckerExecutors(int numOfAckersPerWorker) - throws InvalidTopologyException { - int spoutParallelism = 1; - int boltParallelism = 2; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-2"); - - String topoName = "testTopology"; - - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 200, 2000); - Config conf = createClusterConfig(50, 450, 0, null); - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, topoName); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); - - // Topology needs 2 workers (estimated by nimbus based on resources), - // but with ackers added, probably more worker will be launched. - // Parameterized test on different numOfAckersPerWorker - if (numOfAckersPerWorker == -1) { - // Both Config.TOPOLOGY_ACKER_EXECUTORS and Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER are not set - // Default will be 2 (estimate num of workers) and 1 respectively - } else { - conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); - } - - int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); - Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); - - conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); - conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); - - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), CURRENT_TIME, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - scheduler = new ResourceAwareScheduler(); + throws InvalidTopologyException { + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + int boltParallelism = 2; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-2"); + + String topoName = "testTopology"; + + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 200, 2000); + Config conf = createClusterConfig(strategyClass, 50, 450, 0, null); + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, topoName); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + + // Topology needs 2 workers (estimated by nimbus based on resources), + // but with ackers added, probably more worker will be launched. + // Parameterized test on different numOfAckersPerWorker + if (numOfAckersPerWorker == -1) { + // Both Config.TOPOLOGY_ACKER_EXECUTORS and Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER are not set + // Default will be 2 (estimate num of workers) and 1 respectively + } else { + conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); + } - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); + Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); + + conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); + conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); + + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), CURRENT_TIME, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + // Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0], [8, 8], [7, 7]] + // Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2) + HashSet> expectedScheduling = new HashSet<>(); + if (numOfAckersPerWorker == -1 || numOfAckersPerWorker == 1) { + // Setting topology.acker.executors = null and topology.acker.executors.per.worker = null + // are equivalent to topology.acker.executors = null and topology.acker.executors.per.worker = 1 + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(6, 6), //bolt-3 + new ExecutorDetails(2, 2), //bolt-1 + new ExecutorDetails(4, 4), //bolt-2 + new ExecutorDetails(8, 8)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(5, 5), //bolt-3 + new ExecutorDetails(1, 1), //bolt-1 + new ExecutorDetails(3, 3), //bolt-2 + new ExecutorDetails(7, 7)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0)))); //spout + } else if (numOfAckersPerWorker == 0) { + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(6, 6), //bolt-3 + new ExecutorDetails(2, 2), //bolt-1 + new ExecutorDetails(4, 4), //bolt-2 + new ExecutorDetails(5, 5)))); //bolt-3 + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0), //spout + new ExecutorDetails(3, 3), //bolt-2 + new ExecutorDetails(1, 1)))); //bolt-1 + } else if (numOfAckersPerWorker == 2) { + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(6, 6), //bolt-3 + new ExecutorDetails(2, 2), //bolt-1 + new ExecutorDetails(7, 7), //acker + new ExecutorDetails(8, 8)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(4, 4), //bolt-2 + new ExecutorDetails(5, 5), //bolt-3 + new ExecutorDetails(9, 9), //acker + new ExecutorDetails(10, 10)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(1, 1), //bolt-1 + new ExecutorDetails(3, 3), //bolt-2 + new ExecutorDetails(0, 0)))); //spout + } + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } - // Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0], [8, 8], [7, 7]] - // Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2) - HashSet> expectedScheduling = new HashSet<>(); - if (numOfAckersPerWorker == -1 || numOfAckersPerWorker == 1) { - // Setting topology.acker.executors = null and topology.acker.executors.per.worker = null - // are equivalent to topology.acker.executors = null and topology.acker.executors.per.worker = 1 - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(6, 6), //bolt-3 - new ExecutorDetails(2, 2), //bolt-1 - new ExecutorDetails(4, 4), //bolt-2 - new ExecutorDetails(8, 8)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(5, 5), //bolt-3 - new ExecutorDetails(1, 1), //bolt-1 - new ExecutorDetails(3, 3), //bolt-2 - new ExecutorDetails(7, 7)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0)))); //spout - } else if (numOfAckersPerWorker == 0) { - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(6, 6), //bolt-3 - new ExecutorDetails(2, 2), //bolt-1 - new ExecutorDetails(4, 4), //bolt-2 - new ExecutorDetails(5, 5)))); //bolt-3 - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //spout - new ExecutorDetails(3, 3), //bolt-2 - new ExecutorDetails(1, 1)))); //bolt-1 - } else if (numOfAckersPerWorker == 2) { - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(6, 6), //bolt-3 - new ExecutorDetails(2, 2), //bolt-1 - new ExecutorDetails(7, 7), //acker - new ExecutorDetails(8, 8)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(4, 4), //bolt-2 - new ExecutorDetails(5, 5), //bolt-3 - new ExecutorDetails(9, 9), //acker - new ExecutorDetails(10, 10)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(1, 1), //bolt-1 - new ExecutorDetails(3, 3), //bolt-2 - new ExecutorDetails(0, 0)))); //spout + assertEquals(expectedScheduling, foundScheduling); } - HashSet> foundScheduling = new HashSet<>(); - SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); - for (Collection execs : assignment.getSlotToExecutors().values()) { - foundScheduling.add(new HashSet<>(execs)); - } - - assertEquals(expectedScheduling, foundScheduling); } /** @@ -583,85 +591,86 @@ public void testDefaultResourceAwareStrategyWithoutSettingAckerExecutors(int num @ParameterizedTest @ValueSource(ints = {-1, 0, 2, 300}) public void testDefaultResourceAwareStrategyWithSettingAckerExecutors(int numOfAckersPerWorker) - throws InvalidTopologyException { - - int spoutParallelism = 1; - int boltParallelism = 2; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-2"); - - String topoName = "testTopology"; - - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 200, 2000); - Config conf = createClusterConfig(50, 450, 0, null); - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, topoName); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); - - conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 4); - conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); - - - if (numOfAckersPerWorker == -1) { - // Leave topology.acker.executors.per.worker unset - } else { + throws InvalidTopologyException { + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + int boltParallelism = 2; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-2"); + + String topoName = "testTopology"; + + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 200, 2000); + Config conf = createClusterConfig(strategyClass, 50, 450, 0, null); + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, topoName); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + + conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 4); conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); - } - int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); - Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); - - conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); - conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); - - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), CURRENT_TIME, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - scheduler = new ResourceAwareScheduler(); - - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - // Sorted execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0], [8, 8], [7, 7], [10, 10], [9, 9]] - // Ackers: [[8, 8], [7, 7], [10, 10], [9, 9]] - - HashSet> expectedScheduling = new HashSet<>(); - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(6, 6), //bolt-3 - new ExecutorDetails(2, 2), //bolt-1 - new ExecutorDetails(7, 7), //acker - new ExecutorDetails(8, 8)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(5, 5), //bolt-3 - new ExecutorDetails(4, 4), //bolt-2 - new ExecutorDetails(9, 9), //acker - new ExecutorDetails(10, 10)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //spout - new ExecutorDetails(3, 3), //bolt-2 - new ExecutorDetails(1, 1)))); //bolt-1 - - HashSet> foundScheduling = new HashSet<>(); - SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); - for (Collection execs : assignment.getSlotToExecutors().values()) { - foundScheduling.add(new HashSet<>(execs)); - } - assertEquals(expectedScheduling, foundScheduling); + if (numOfAckersPerWorker == -1) { + // Leave topology.acker.executors.per.worker unset + } else { + conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); + } + + int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); + Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); + + conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); + conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); + + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), CURRENT_TIME, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + // Sorted execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0], [8, 8], [7, 7], [10, 10], [9, 9]] + // Ackers: [[8, 8], [7, 7], [10, 10], [9, 9]] + + HashSet> expectedScheduling = new HashSet<>(); + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(6, 6), //bolt-3 + new ExecutorDetails(2, 2), //bolt-1 + new ExecutorDetails(7, 7), //acker + new ExecutorDetails(8, 8)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(5, 5), //bolt-3 + new ExecutorDetails(4, 4), //bolt-2 + new ExecutorDetails(9, 9), //acker + new ExecutorDetails(10, 10)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0), //spout + new ExecutorDetails(3, 3), //bolt-2 + new ExecutorDetails(1, 1)))); //bolt-1 + + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } + + assertEquals(expectedScheduling, foundScheduling); + } } /** @@ -669,61 +678,63 @@ public void testDefaultResourceAwareStrategyWithSettingAckerExecutors(int numOfA */ @Test public void testDefaultResourceAwareStrategyInFavorOfShuffle() - throws InvalidTopologyException { - int spoutParallelism = 1; - int boltParallelism = 2; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-2"); - - StormTopology stormToplogy = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisors(4, 4, 200, 2000); - Config conf = createClusterConfig(50, 250, 250, null); - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, "testTopology"); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); - conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); - conf.put(Config.TOPOLOGY_RAS_ORDER_EXECUTORS_BY_PROXIMITY_NEEDS, true); - - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, - genExecsAndComps(StormCommon.systemTopology(conf, stormToplogy)), CURRENT_TIME, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - ResourceAwareScheduler rs = new ResourceAwareScheduler(); - - rs.prepare(conf, new StormMetricsRegistry()); - rs.schedule(topologies, cluster); - // Sorted execs: [[0, 0], [2, 2], [6, 6], [4, 4], [1, 1], [5, 5], [3, 3], [7, 7]] - // Ackers: [[7, 7]]] - - HashSet> expectedScheduling = new HashSet<>(); - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //spout - new ExecutorDetails(6, 6), //bolt-2 - new ExecutorDetails(2, 2), //bolt-1 - new ExecutorDetails(7, 7)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(3, 3), //bolt-3 - new ExecutorDetails(5, 5), //bolt-2 - new ExecutorDetails(4, 4), //bolt-3 - new ExecutorDetails(1, 1)))); //bolt-1 - HashSet> foundScheduling = new HashSet<>(); - SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); - for (Collection execs : assignment.getSlotToExecutors().values()) { - foundScheduling.add(new HashSet<>(execs)); - } + throws InvalidTopologyException { + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + int boltParallelism = 2; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-2"); + + StormTopology stormToplogy = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisors(4, 4, 200, 2000); + Config conf = createClusterConfig(strategyClass, 50, 250, 250, null); + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, "testTopology"); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + conf.put(Config.TOPOLOGY_RAS_ORDER_EXECUTORS_BY_PROXIMITY_NEEDS, true); + + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormToplogy)), CURRENT_TIME, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + ResourceAwareScheduler rs = new ResourceAwareScheduler(); + + rs.prepare(conf, new StormMetricsRegistry()); + rs.schedule(topologies, cluster); + // Sorted execs: [[0, 0], [2, 2], [6, 6], [4, 4], [1, 1], [5, 5], [3, 3], [7, 7]] + // Ackers: [[7, 7]]] + + HashSet> expectedScheduling = new HashSet<>(); + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0), //spout + new ExecutorDetails(6, 6), //bolt-2 + new ExecutorDetails(2, 2), //bolt-1 + new ExecutorDetails(7, 7)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(3, 3), //bolt-3 + new ExecutorDetails(5, 5), //bolt-2 + new ExecutorDetails(4, 4), //bolt-3 + new ExecutorDetails(1, 1)))); //bolt-1 + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } - assertEquals(expectedScheduling, foundScheduling); + assertEquals(expectedScheduling, foundScheduling); + } } /** @@ -731,113 +742,115 @@ public void testDefaultResourceAwareStrategyInFavorOfShuffle() */ @Test public void testMultipleRacks() { - final Map supMap = new HashMap<>(); - final Map supMapRack0 = genSupervisors(10, 4, 0, 400, 8000); - //generate another rack of supervisors with less resources - final Map supMapRack1 = genSupervisors(10, 4, 10, 200, 4000); - - //generate some supervisors that are depleted of one resource - final Map supMapRack2 = genSupervisors(10, 4, 20, 0, 8000); - - //generate some that has alot of memory but little of cpu - final Map supMapRack3 = genSupervisors(10, 4, 30, 10, 8000 * 2 + 4000); - - //generate some that has alot of cpu but little of memory - final Map supMapRack4 = genSupervisors(10, 4, 40, 400 + 200 + 10, 1000); - - //Generate some that have neither resource, to verify that the strategy will prioritize this last - //Also put a generic resource with 0 value in the resources list, to verify that it doesn't affect the sorting - final Map supMapRack5 = genSupervisors(10, 4, 50, 0.0, 0.0, Collections.singletonMap("gpu.count", 0.0)); - - supMap.putAll(supMapRack0); - supMap.putAll(supMapRack1); - supMap.putAll(supMapRack2); - supMap.putAll(supMapRack3); - supMap.putAll(supMapRack4); - supMap.putAll(supMapRack5); - - Config config = createClusterConfig(100, 500, 500, null); - config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); - INimbus iNimbus = new INimbusTest(); - - //create test DNSToSwitchMapping plugin - DNSToSwitchMapping TestNetworkTopographyPlugin = - new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4, supMapRack5); - - //generate topologies - TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); - TopologyDetails topo2 = genTopology("topo-2", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); - - Topologies topologies = new Topologies(topo1, topo2); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - - List supHostnames = new LinkedList<>(); - for (SupervisorDetails sup : supMap.values()) { - supHostnames.add(sup.getHost()); - } - Map> rackToNodes = new HashMap<>(); - Map resolvedSuperVisors = TestNetworkTopographyPlugin.resolve(supHostnames); - for (Map.Entry entry : resolvedSuperVisors.entrySet()) { - String hostName = entry.getKey(); - String rack = entry.getValue(); - rackToNodes.computeIfAbsent(rack, rid -> new ArrayList<>()).add(hostName); - } - cluster.setNetworkTopography(rackToNodes); - - DefaultResourceAwareStrategyOld rs = new DefaultResourceAwareStrategyOld(); - - rs.prepareForScheduling(cluster, topo1); - INodeSorter nodeSorter = new NodeSorterHostProximity(cluster, topo1, BaseResourceAwareStrategy.NodeSortType.DEFAULT_RAS); - nodeSorter.prepare(null); - Iterable sortedRacks = nodeSorter.getSortedRacks(); - - Iterator it = sortedRacks.iterator(); - // Ranked first since rack-0 has the most balanced set of resources - assertEquals("rack-0", it.next().id, "rack-0 should be ordered first"); - // Ranked second since rack-1 has a balanced set of resources but less than rack-0 - assertEquals("rack-1", it.next().id, "rack-1 should be ordered second"); - // Ranked third since rack-4 has a lot of cpu but not a lot of memory - assertEquals("rack-4", it.next().id, "rack-4 should be ordered third"); - // Ranked fourth since rack-3 has alot of memory but not cpu - assertEquals("rack-3", it.next().id, "rack-3 should be ordered fourth"); - //Ranked fifth since rack-2 has not cpu resources - assertEquals("rack-2", it.next().id, "rack-2 should be ordered fifth"); - //Ranked last since rack-5 has neither CPU nor memory available - assertEquals("rack-5", it.next().id, "Rack-5 should be ordered sixth"); - - SchedulingResult schedulingResult = rs.schedule(cluster, topo1); - assert(schedulingResult.isSuccess()); - SchedulerAssignment assignment = cluster.getAssignmentById(topo1.getId()); - for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { - //make sure all workers on scheduled in rack-0 - assertEquals("rack-0", - resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()), "assert worker scheduled on rack-0"); - } - assertEquals(0, cluster.getUnassignedExecutors(topo1).size(), "All executors in topo-1 scheduled"); - - //Test if topology is already partially scheduled on one rack - Iterator executorIterator = topo2.getExecutors().iterator(); - List nodeHostnames = rackToNodes.get("rack-1"); - for (int i = 0; i< topo2.getExecutors().size()/2; i++) { - String nodeHostname = nodeHostnames.get(i % nodeHostnames.size()); - RasNode node = rs.hostnameToNodes(nodeHostname).get(0); - WorkerSlot targetSlot = node.getFreeSlots().iterator().next(); - ExecutorDetails targetExec = executorIterator.next(); - // to keep track of free slots - node.assign(targetSlot, topo2, Collections.singletonList(targetExec)); - } + for (Class strategyClass: strategyClasses) { + final Map supMap = new HashMap<>(); + final Map supMapRack0 = genSupervisors(10, 4, 0, 400, 8000); + //generate another rack of supervisors with less resources + final Map supMapRack1 = genSupervisors(10, 4, 10, 200, 4000); + + //generate some supervisors that are depleted of one resource + final Map supMapRack2 = genSupervisors(10, 4, 20, 0, 8000); + + //generate some that has alot of memory but little of cpu + final Map supMapRack3 = genSupervisors(10, 4, 30, 10, 8000 * 2 + 4000); + + //generate some that has alot of cpu but little of memory + final Map supMapRack4 = genSupervisors(10, 4, 40, 400 + 200 + 10, 1000); + + //Generate some that have neither resource, to verify that the strategy will prioritize this last + //Also put a generic resource with 0 value in the resources list, to verify that it doesn't affect the sorting + final Map supMapRack5 = genSupervisors(10, 4, 50, 0.0, 0.0, Collections.singletonMap("gpu.count", 0.0)); + + supMap.putAll(supMapRack0); + supMap.putAll(supMapRack1); + supMap.putAll(supMapRack2); + supMap.putAll(supMapRack3); + supMap.putAll(supMapRack4); + supMap.putAll(supMapRack5); + + Config config = createClusterConfig(strategyClass, 100, 500, 500, null); + config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + INimbus iNimbus = new INimbusTest(); + + //create test DNSToSwitchMapping plugin + DNSToSwitchMapping TestNetworkTopographyPlugin = + new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4, supMapRack5); + + //generate topologies + TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + TopologyDetails topo2 = genTopology("topo-2", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + Topologies topologies = new Topologies(topo1, topo2); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + List supHostnames = new LinkedList<>(); + for (SupervisorDetails sup : supMap.values()) { + supHostnames.add(sup.getHost()); + } + Map> rackToNodes = new HashMap<>(); + Map resolvedSuperVisors = TestNetworkTopographyPlugin.resolve(supHostnames); + for (Map.Entry entry : resolvedSuperVisors.entrySet()) { + String hostName = entry.getKey(); + String rack = entry.getValue(); + rackToNodes.computeIfAbsent(rack, rid -> new ArrayList<>()).add(hostName); + } + cluster.setNetworkTopography(rackToNodes); + + DefaultResourceAwareStrategyOld rs = new DefaultResourceAwareStrategyOld(); + + rs.prepareForScheduling(cluster, topo1); + INodeSorter nodeSorter = new NodeSorterHostProximity(cluster, topo1, BaseResourceAwareStrategy.NodeSortType.DEFAULT_RAS); + nodeSorter.prepare(null); + Iterable sortedRacks = nodeSorter.getSortedRacks(); + + Iterator it = sortedRacks.iterator(); + // Ranked first since rack-0 has the most balanced set of resources + assertEquals("rack-0", it.next().id, "rack-0 should be ordered first"); + // Ranked second since rack-1 has a balanced set of resources but less than rack-0 + assertEquals("rack-1", it.next().id, "rack-1 should be ordered second"); + // Ranked third since rack-4 has a lot of cpu but not a lot of memory + assertEquals("rack-4", it.next().id, "rack-4 should be ordered third"); + // Ranked fourth since rack-3 has alot of memory but not cpu + assertEquals("rack-3", it.next().id, "rack-3 should be ordered fourth"); + //Ranked fifth since rack-2 has not cpu resources + assertEquals("rack-2", it.next().id, "rack-2 should be ordered fifth"); + //Ranked last since rack-5 has neither CPU nor memory available + assertEquals("rack-5", it.next().id, "Rack-5 should be ordered sixth"); + + SchedulingResult schedulingResult = rs.schedule(cluster, topo1); + assert (schedulingResult.isSuccess()); + SchedulerAssignment assignment = cluster.getAssignmentById(topo1.getId()); + for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { + //make sure all workers on scheduled in rack-0 + assertEquals("rack-0", + resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()), "assert worker scheduled on rack-0"); + } + assertEquals(0, cluster.getUnassignedExecutors(topo1).size(), "All executors in topo-1 scheduled"); + + //Test if topology is already partially scheduled on one rack + Iterator executorIterator = topo2.getExecutors().iterator(); + List nodeHostnames = rackToNodes.get("rack-1"); + for (int i = 0; i < topo2.getExecutors().size() / 2; i++) { + String nodeHostname = nodeHostnames.get(i % nodeHostnames.size()); + RasNode node = rs.hostnameToNodes(nodeHostname).get(0); + WorkerSlot targetSlot = node.getFreeSlots().iterator().next(); + ExecutorDetails targetExec = executorIterator.next(); + // to keep track of free slots + node.assign(targetSlot, topo2, Collections.singletonList(targetExec)); + } - rs = new DefaultResourceAwareStrategyOld(); - // schedule topo2 - schedulingResult = rs.schedule(cluster, topo2); - assert(schedulingResult.isSuccess()); - assignment = cluster.getAssignmentById(topo2.getId()); - for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { - //make sure all workers on scheduled in rack-1 - assertEquals("rack-1", - resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()), "assert worker scheduled on rack-1"); + rs = new DefaultResourceAwareStrategyOld(); + // schedule topo2 + schedulingResult = rs.schedule(cluster, topo2); + assert (schedulingResult.isSuccess()); + assignment = cluster.getAssignmentById(topo2.getId()); + for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { + //make sure all workers on scheduled in rack-1 + assertEquals("rack-1", + resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()), "assert worker scheduled on rack-1"); + } + assertEquals(0, cluster.getUnassignedExecutors(topo2).size(), "All executors in topo-2 scheduled"); } - assertEquals(0, cluster.getUnassignedExecutors(topo2).size(), "All executors in topo-2 scheduled"); } /** @@ -845,126 +858,128 @@ public void testMultipleRacks() { */ @Test public void testMultipleRacksWithFavoritism() { - final Map supMap = new HashMap<>(); - final Map supMapRack0 = genSupervisors(10, 4, 0, 400, 8000); - //generate another rack of supervisors with less resources - final Map supMapRack1 = genSupervisors(10, 4, 10, 200, 4000); - - //generate some supervisors that are depleted of one resource - final Map supMapRack2 = genSupervisors(10, 4, 20, 0, 8000); - - //generate some that has alot of memory but little of cpu - final Map supMapRack3 = genSupervisors(10, 4, 30, 10, 8000 * 2 + 4000); - - //generate some that has alot of cpu but little of memory - final Map supMapRack4 = genSupervisors(10, 4, 40, 400 + 200 + 10, 1000); - - supMap.putAll(supMapRack0); - supMap.putAll(supMapRack1); - supMap.putAll(supMapRack2); - supMap.putAll(supMapRack3); - supMap.putAll(supMapRack4); - - Config config = createClusterConfig(100, 500, 500, null); - config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); - INimbus iNimbus = new INimbusTest(); - - //create test DNSToSwitchMapping plugin - DNSToSwitchMapping TestNetworkTopographyPlugin = - new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4); - - Config t1Conf = new Config(); - t1Conf.putAll(config); - final List t1FavoredHostNames = Arrays.asList("host-41", "host-42", "host-43"); - t1Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, t1FavoredHostNames); - final List t1UnfavoredHostIds = Arrays.asList("host-1", "host-2", "host-3"); - t1Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, t1UnfavoredHostIds); - //generate topologies - TopologyDetails topo1 = genTopology("topo-1", t1Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); - - - Config t2Conf = new Config(); - t2Conf.putAll(config); - t2Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, Arrays.asList("host-31", "host-32", "host-33")); - t2Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, Arrays.asList("host-11", "host-12", "host-13")); - TopologyDetails topo2 = genTopology("topo-2", t2Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); - - Topologies topologies = new Topologies(topo1, topo2); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - - List supHostnames = new LinkedList<>(); - for (SupervisorDetails sup : supMap.values()) { - supHostnames.add(sup.getHost()); - } - Map> rackToNodes = new HashMap<>(); - Map resolvedSuperVisors = TestNetworkTopographyPlugin.resolve(supHostnames); - for (Map.Entry entry : resolvedSuperVisors.entrySet()) { - String hostName = entry.getKey(); - String rack = entry.getValue(); - List nodesForRack = rackToNodes.get(rack); - if (nodesForRack == null) { - nodesForRack = new ArrayList<>(); - rackToNodes.put(rack, nodesForRack); + for (Class strategyClass : strategyClasses) { + final Map supMap = new HashMap<>(); + final Map supMapRack0 = genSupervisors(10, 4, 0, 400, 8000); + //generate another rack of supervisors with less resources + final Map supMapRack1 = genSupervisors(10, 4, 10, 200, 4000); + + //generate some supervisors that are depleted of one resource + final Map supMapRack2 = genSupervisors(10, 4, 20, 0, 8000); + + //generate some that has alot of memory but little of cpu + final Map supMapRack3 = genSupervisors(10, 4, 30, 10, 8000 * 2 + 4000); + + //generate some that has alot of cpu but little of memory + final Map supMapRack4 = genSupervisors(10, 4, 40, 400 + 200 + 10, 1000); + + supMap.putAll(supMapRack0); + supMap.putAll(supMapRack1); + supMap.putAll(supMapRack2); + supMap.putAll(supMapRack3); + supMap.putAll(supMapRack4); + + Config config = createClusterConfig(strategyClass, 100, 500, 500, null); + config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + INimbus iNimbus = new INimbusTest(); + + //create test DNSToSwitchMapping plugin + DNSToSwitchMapping TestNetworkTopographyPlugin = + new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4); + + Config t1Conf = new Config(); + t1Conf.putAll(config); + final List t1FavoredHostNames = Arrays.asList("host-41", "host-42", "host-43"); + t1Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, t1FavoredHostNames); + final List t1UnfavoredHostIds = Arrays.asList("host-1", "host-2", "host-3"); + t1Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, t1UnfavoredHostIds); + //generate topologies + TopologyDetails topo1 = genTopology("topo-1", t1Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + + Config t2Conf = new Config(); + t2Conf.putAll(config); + t2Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, Arrays.asList("host-31", "host-32", "host-33")); + t2Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, Arrays.asList("host-11", "host-12", "host-13")); + TopologyDetails topo2 = genTopology("topo-2", t2Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + Topologies topologies = new Topologies(topo1, topo2); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + List supHostnames = new LinkedList<>(); + for (SupervisorDetails sup : supMap.values()) { + supHostnames.add(sup.getHost()); + } + Map> rackToNodes = new HashMap<>(); + Map resolvedSuperVisors = TestNetworkTopographyPlugin.resolve(supHostnames); + for (Map.Entry entry : resolvedSuperVisors.entrySet()) { + String hostName = entry.getKey(); + String rack = entry.getValue(); + List nodesForRack = rackToNodes.get(rack); + if (nodesForRack == null) { + nodesForRack = new ArrayList<>(); + rackToNodes.put(rack, nodesForRack); + } + nodesForRack.add(hostName); + } + cluster.setNetworkTopography(rackToNodes); + + DefaultResourceAwareStrategyOld rs = new DefaultResourceAwareStrategyOld(); + + rs.prepareForScheduling(cluster, topo1); + INodeSorter nodeSorter = new NodeSorterHostProximity(cluster, topo1, BaseResourceAwareStrategy.NodeSortType.DEFAULT_RAS); + nodeSorter.prepare(null); + Iterable sortedRacks = nodeSorter.getSortedRacks(); + + Iterator it = sortedRacks.iterator(); + // Ranked first since rack-0 has the most balanced set of resources + assertEquals("rack-0", it.next().id, "rack-0 should be ordered first"); + // Ranked second since rack-1 has a balanced set of resources but less than rack-0 + assertEquals("rack-1", it.next().id, "rack-1 should be ordered second"); + // Ranked third since rack-4 has a lot of cpu but not a lot of memory + assertEquals("rack-4", it.next().id, "rack-4 should be ordered third"); + // Ranked fourth since rack-3 has alot of memory but not cpu + assertEquals("rack-3", it.next().id, "rack-3 should be ordered fourth"); + //Ranked last since rack-2 has not cpu resources + assertEquals("rack-2", it.next().id, "rack-2 should be ordered fifth"); + + SchedulingResult schedulingResult = rs.schedule(cluster, topo1); + assert (schedulingResult.isSuccess()); + SchedulerAssignment assignment = cluster.getAssignmentById(topo1.getId()); + for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { + String hostName = rs.idToNode(ws.getNodeId()).getHostname(); + String rackId = resolvedSuperVisors.get(hostName); + assertTrue(t1FavoredHostNames.contains(hostName) || "rack-0".equals(rackId), + ws + " is neither on a favored node " + t1FavoredHostNames + " nor the highest priority rack (rack-0)"); + assertFalse(t1UnfavoredHostIds.contains(hostName), + ws + " is a part of an unfavored node " + t1UnfavoredHostIds); + } + assertEquals(0, cluster.getUnassignedExecutors(topo1).size(), "All executors in topo-1 scheduled"); + + //Test if topology is already partially scheduled on one rack + Iterator executorIterator = topo2.getExecutors().iterator(); + List nodeHostnames = rackToNodes.get("rack-1"); + for (int i = 0; i < topo2.getExecutors().size() / 2; i++) { + String nodeHostname = nodeHostnames.get(i % nodeHostnames.size()); + RasNode node = rs.hostnameToNodes(nodeHostname).get(0); + WorkerSlot targetSlot = node.getFreeSlots().iterator().next(); + ExecutorDetails targetExec = executorIterator.next(); + // to keep track of free slots + node.assign(targetSlot, topo2, Collections.singletonList(targetExec)); } - nodesForRack.add(hostName); - } - cluster.setNetworkTopography(rackToNodes); - - DefaultResourceAwareStrategyOld rs = new DefaultResourceAwareStrategyOld(); - - rs.prepareForScheduling(cluster, topo1); - INodeSorter nodeSorter = new NodeSorterHostProximity(cluster, topo1, BaseResourceAwareStrategy.NodeSortType.DEFAULT_RAS); - nodeSorter.prepare(null); - Iterable sortedRacks= nodeSorter.getSortedRacks(); - - Iterator it = sortedRacks.iterator(); - // Ranked first since rack-0 has the most balanced set of resources - assertEquals("rack-0", it.next().id, "rack-0 should be ordered first"); - // Ranked second since rack-1 has a balanced set of resources but less than rack-0 - assertEquals("rack-1", it.next().id, "rack-1 should be ordered second"); - // Ranked third since rack-4 has a lot of cpu but not a lot of memory - assertEquals("rack-4", it.next().id, "rack-4 should be ordered third"); - // Ranked fourth since rack-3 has alot of memory but not cpu - assertEquals("rack-3", it.next().id, "rack-3 should be ordered fourth"); - //Ranked last since rack-2 has not cpu resources - assertEquals("rack-2", it.next().id, "rack-2 should be ordered fifth"); - - SchedulingResult schedulingResult = rs.schedule(cluster, topo1); - assert(schedulingResult.isSuccess()); - SchedulerAssignment assignment = cluster.getAssignmentById(topo1.getId()); - for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { - String hostName = rs.idToNode(ws.getNodeId()).getHostname(); - String rackId = resolvedSuperVisors.get(hostName); - assertTrue(t1FavoredHostNames.contains(hostName) || "rack-0".equals(rackId), - ws + " is neither on a favored node " + t1FavoredHostNames + " nor the highest priority rack (rack-0)"); - assertFalse(t1UnfavoredHostIds.contains(hostName), - ws + " is a part of an unfavored node " + t1UnfavoredHostIds); - } - assertEquals(0, cluster.getUnassignedExecutors(topo1).size(), "All executors in topo-1 scheduled"); - - //Test if topology is already partially scheduled on one rack - Iterator executorIterator = topo2.getExecutors().iterator(); - List nodeHostnames = rackToNodes.get("rack-1"); - for (int i = 0; i< topo2.getExecutors().size()/2; i++) { - String nodeHostname = nodeHostnames.get(i % nodeHostnames.size()); - RasNode node = rs.hostnameToNodes(nodeHostname).get(0); - WorkerSlot targetSlot = node.getFreeSlots().iterator().next(); - ExecutorDetails targetExec = executorIterator.next(); - // to keep track of free slots - node.assign(targetSlot, topo2, Collections.singletonList(targetExec)); - } - rs = new DefaultResourceAwareStrategyOld(); - // schedule topo2 - schedulingResult = rs.schedule(cluster, topo2); - assert(schedulingResult.isSuccess()); - assignment = cluster.getAssignmentById(topo2.getId()); - for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { - //make sure all workers on scheduled in rack-1 - // The favored nodes would have put it on a different rack, but because that rack does not have free space to run the - // topology it falls back to this rack - assertEquals("rack-1", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()), "assert worker scheduled on rack-1"); + rs = new DefaultResourceAwareStrategyOld(); + // schedule topo2 + schedulingResult = rs.schedule(cluster, topo2); + assert (schedulingResult.isSuccess()); + assignment = cluster.getAssignmentById(topo2.getId()); + for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { + //make sure all workers on scheduled in rack-1 + // The favored nodes would have put it on a different rack, but because that rack does not have free space to run the + // topology it falls back to this rack + assertEquals("rack-1", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()), "assert worker scheduled on rack-1"); + } + assertEquals(0, cluster.getUnassignedExecutors(topo2).size(), "All executors in topo-2 scheduled"); } - assertEquals(0, cluster.getUnassignedExecutors(topo2).size() , "All executors in topo-2 scheduled" ); } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestGenericResourceAwareStrategy.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestGenericResourceAwareStrategy.java index 26ea6b4a47a..1bf69404153 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestGenericResourceAwareStrategy.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestGenericResourceAwareStrategy.java @@ -67,6 +67,10 @@ public class TestGenericResourceAwareStrategy { private static final Logger LOG = LoggerFactory.getLogger(TestGenericResourceAwareStrategy.class); + private static final Class[] strategyClasses = { + GenericResourceAwareStrategy.class, + GenericResourceAwareStrategyOld.class, + }; private final int currentTime = 1450418597; private IScheduler scheduler = null; @@ -79,14 +83,10 @@ public void cleanup() { } } - protected Class getGenericResourceAwareStrategyClass() { - return GenericResourceAwareStrategy.class; - } - - private Config createGrasClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + private Config createGrasClusterConfig(Class strategyClass, double compPcore, double compOnHeap, double compOffHeap, Map> pools, Map genericResourceMap) { Config config = TestUtilsForResourceAwareScheduler.createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, pools, genericResourceMap); - config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, getGenericResourceAwareStrategyClass().getName()); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); return config; } @@ -95,93 +95,95 @@ private Config createGrasClusterConfig(double compPcore, double compOnHeap, doub */ @Test public void testGenericResourceAwareStrategySharedMemory() { - int spoutParallelism = 2; - int boltParallelism = 2; - int numBolts = 3; - double cpuPercent = 10; - double memoryOnHeap = 10; - double memoryOffHeap = 10; - double sharedOnHeap = 500; - double sharedOffHeapNode = 700; - double sharedOffHeapWorker = 500; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism).addResource("gpu.count", 1.0); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWorker, "bolt-1 shared off heap worker")).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapNode, "bolt-2 shared node")).shuffleGrouping("bolt-1"); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeap, "bolt-3 shared worker")).shuffleGrouping("bolt-2"); - - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - - Config conf = createGrasClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap()); - Map genericResourcesMap = new HashMap<>(); - genericResourcesMap.put("gpu.count", 1.0); - - Map supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap); - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, "testTopology"); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(stormTopology), currentTime, "user"); - - Topologies topologies = new Topologies(topo); - - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - scheduler = new ResourceAwareScheduler(); + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 2; + int boltParallelism = 2; + int numBolts = 3; + double cpuPercent = 10; + double memoryOnHeap = 10; + double memoryOffHeap = 10; + double sharedOnHeap = 500; + double sharedOffHeapNode = 700; + double sharedOffHeapWorker = 500; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism).addResource("gpu.count", 1.0); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWorker, "bolt-1 shared off heap worker")).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapNode, "bolt-2 shared node")).shuffleGrouping("bolt-1"); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeap, "bolt-3 shared worker")).shuffleGrouping("bolt-2"); + + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + + Config conf = createGrasClusterConfig(strategyClass, cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap()); + Map genericResourcesMap = new HashMap<>(); + genericResourcesMap.put("gpu.count", 1.0); + + Map supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap); + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, "testTopology"); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(stormTopology), currentTime, "user"); + + Topologies topologies = new Topologies(topo); + + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + for (Entry entry : cluster.getSupervisorsResourcesMap().entrySet()) { + String supervisorId = entry.getKey(); + SupervisorResources resources = entry.getValue(); + assertTrue(resources.getTotalCpu() >= resources.getUsedCpu(), supervisorId); + assertTrue(resources.getTotalMem() >= resources.getUsedMem(), supervisorId); + } - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - for (Entry entry: cluster.getSupervisorsResourcesMap().entrySet()) { - String supervisorId = entry.getKey(); - SupervisorResources resources = entry.getValue(); - assertTrue(resources.getTotalCpu() >= resources.getUsedCpu(), supervisorId); - assertTrue(resources.getTotalMem() >= resources.getUsedMem(), supervisorId); - } + // If we didn't take GPUs into account everything would fit under a single slot + // But because there is only 1 GPU per node, and each of the 2 spouts needs a GPU + // It has to be scheduled on at least 2 nodes, and hence 2 slots. + // Because of this, all the bolts will be scheduled on a single slot with one of + // the spouts and the other spout is on its own slot. So everything that can be shared is + // shared. + int totalNumberOfTasks = (spoutParallelism + (boltParallelism * numBolts)); + double totalExpectedCPU = totalNumberOfTasks * cpuPercent; + double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeap; + double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWorker; + + SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); + Set slots = assignment.getSlots(); + Map nodeToTotalShared = assignment.getNodeIdToTotalSharedOffHeapNodeMemory(); + LOG.info("NODE TO SHARED OFF HEAP {}", nodeToTotalShared); + Map scheduledResources = assignment.getScheduledResources(); + assertEquals(2, slots.size()); + assertEquals(2, nodeToTotalShared.size()); + assertEquals(2, scheduledResources.size()); + double totalFoundCPU = 0.0; + double totalFoundOnHeap = 0.0; + double totalFoundWorkerOffHeap = 0.0; + for (WorkerSlot ws : slots) { + WorkerResources resources = scheduledResources.get(ws); + totalFoundCPU += resources.get_cpu(); + totalFoundOnHeap += resources.get_mem_on_heap(); + totalFoundWorkerOffHeap += resources.get_mem_off_heap(); + } - // If we didn't take GPUs into account everything would fit under a single slot - // But because there is only 1 GPU per node, and each of the 2 spouts needs a GPU - // It has to be scheduled on at least 2 nodes, and hence 2 slots. - // Because of this, all the bolts will be scheduled on a single slot with one of - // the spouts and the other spout is on its own slot. So everything that can be shared is - // shared. - int totalNumberOfTasks = (spoutParallelism + (boltParallelism * numBolts)); - double totalExpectedCPU = totalNumberOfTasks * cpuPercent; - double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeap; - double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWorker; - - SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); - Set slots = assignment.getSlots(); - Map nodeToTotalShared = assignment.getNodeIdToTotalSharedOffHeapNodeMemory(); - LOG.info("NODE TO SHARED OFF HEAP {}", nodeToTotalShared); - Map scheduledResources = assignment.getScheduledResources(); - assertEquals(2, slots.size()); - assertEquals(2, nodeToTotalShared.size()); - assertEquals(2, scheduledResources.size()); - double totalFoundCPU = 0.0; - double totalFoundOnHeap = 0.0; - double totalFoundWorkerOffHeap = 0.0; - for (WorkerSlot ws : slots) { - WorkerResources resources = scheduledResources.get(ws); - totalFoundCPU += resources.get_cpu(); - totalFoundOnHeap += resources.get_mem_on_heap(); - totalFoundWorkerOffHeap += resources.get_mem_off_heap(); + assertEquals(totalExpectedCPU, totalFoundCPU, 0.01); + assertEquals(totalExpectedOnHeap, totalFoundOnHeap, 0.01); + assertEquals(totalExpectedWorkerOffHeap, totalFoundWorkerOffHeap, 0.01); + assertEquals(sharedOffHeapNode, nodeToTotalShared.values().stream().mapToDouble((d) -> d).sum(), 0.01); + assertEquals(sharedOnHeap, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_on_heap).sum(), 0.01); + assertEquals(sharedOffHeapWorker, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_off_heap).sum(), + 0.01); } - - assertEquals(totalExpectedCPU, totalFoundCPU, 0.01); - assertEquals(totalExpectedOnHeap, totalFoundOnHeap, 0.01); - assertEquals(totalExpectedWorkerOffHeap, totalFoundWorkerOffHeap, 0.01); - assertEquals(sharedOffHeapNode, nodeToTotalShared.values().stream().mapToDouble((d) -> d).sum(), 0.01); - assertEquals(sharedOnHeap, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_on_heap).sum(), 0.01); - assertEquals(sharedOffHeapWorker, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_off_heap).sum(), - 0.01); } /** @@ -194,228 +196,232 @@ public void testGenericResourceAwareStrategySharedMemory() { @ValueSource(ints = {-1, 0, 1, 2}) public void testGenericResourceAwareStrategyWithoutSettingAckerExecutors(int numOfAckersPerWorker) throws InvalidTopologyException { - int spoutParallelism = 1; - int boltParallelism = 2; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0); - - String topoName = "testTopology"; - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - - Config conf = createGrasClusterConfig(50, 500, 0, null, Collections.emptyMap()); - Map genericResourcesMap = new HashMap<>(); - genericResourcesMap.put("gpu.count", 2.0); - Map supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap); - - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, topoName); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); - - // Topology needs 2 workers (estimated by nimbus based on resources), - // but with ackers added, probably more worker will be launched. - // Parameterized test on different numOfAckersPerWorker - if (numOfAckersPerWorker == -1) { - // Both Config.TOPOLOGY_ACKER_EXECUTORS and Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER are not set - // Default will be 2 (estimate num of workers) and 1 respectively - } else { - conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + int boltParallelism = 2; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0); + + String topoName = "testTopology"; + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + + Config conf = createGrasClusterConfig(strategyClass, 50, 500, 0, null, Collections.emptyMap()); + Map genericResourcesMap = new HashMap<>(); + genericResourcesMap.put("gpu.count", 2.0); + Map supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap); + + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, topoName); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + + // Topology needs 2 workers (estimated by nimbus based on resources), + // but with ackers added, probably more worker will be launched. + // Parameterized test on different numOfAckersPerWorker + if (numOfAckersPerWorker == -1) { + // Both Config.TOPOLOGY_ACKER_EXECUTORS and Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER are not set + // Default will be 2 (estimate num of workers) and 1 respectively + } else { + conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); + } + + int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); + Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); + + conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); + conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); + + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), currentTime, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + // We need to have 3 slots on 3 separate hosts. The topology needs 6 GPUs 3500 MB memory and 350% CPU + // The bolt-3 instances must be on separate nodes because they each need 2 GPUs. + // The bolt-2 instances must be on the same node as they each need 1 GPU + // (this assumes that we are packing the components to avoid fragmentation). + // The bolt-1 and spout instances fill in the rest. + + // Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0]] + // Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2) + HashSet> expectedScheduling = new HashSet<>(); + if (numOfAckersPerWorker == -1 || numOfAckersPerWorker == 1) { + expectedScheduling.add(new HashSet<>(Collections.singletonList( + new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU + //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(2, 2), //bolt-1 - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(5, 5), //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(8, 8)))); //acker - 250 MB, 50% CPU, 0 GPU + //Total 1750 MB, 200% CPU, 2 GPU -> this node has 250 MB, 0% CPU, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(4, 4), //bolt-3 500 MB, 50% cpu, 2 GPU + new ExecutorDetails(1, 1), //bolt-1 - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(7, 7)))); //acker - 250 MB, 50% CPU, 0 GPU + //Total 1750 MB, 200% CPU, 2 GPU -> this node has 250 MB, 0% CPU, 0 GPU left + } else if (numOfAckersPerWorker == 0) { + expectedScheduling.add(new HashSet<>(Collections.singletonList( + new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU + //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(2, 2), //bolt-1 - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(5, 5), //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(1, 1)))); //bolt-1 - 500 MB, 50% CPU, 0 GPU + //Total 2000 MB, 200% CPU, 2 GPU -> this node has 0 MB, 0% CPU, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(4, 4)))); //bolt-3 500 MB, 50% cpu, 2 GPU + //Total 1000 MB, 100% CPU, 2 GPU -> this node has 1000 MB, 100% CPU, 0 GPU left + } else if (numOfAckersPerWorker == 2) { + expectedScheduling.add(new HashSet<>(Collections.singletonList( + new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU + //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(7, 7), //acker - 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(8, 8), //acker - 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(2, 2)))); //bolt-1 - 500 MB, 50% CPU, 0 GPU + //Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(9, 9), //acker- 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(10, 10), //acker- 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(1, 1), //bolt-1 - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(4, 4)))); //bolt-3 500 MB, 50% cpu, 2 GPU + //Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(5, 5)))); //bolt-2 - 500 MB, 50% CPU, 1 GPU + //Total 1000 MB, 100% CPU, 2 GPU -> this node has 1000 MB, 100% CPU, 0 GPU left + } + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } + + assertEquals(expectedScheduling, foundScheduling); } + } + + /** + * Test if the scheduling logic for the GenericResourceAwareStrategy is correct + * with setting {@link Config#TOPOLOGY_ACKER_EXECUTORS}. + * + * Test details refer to {@link TestDefaultResourceAwareStrategy#testDefaultResourceAwareStrategyWithSettingAckerExecutors(int)} + */ + @ParameterizedTest + @ValueSource(ints = {-1, 0, 2, 200}) + public void testGenericResourceAwareStrategyWithSettingAckerExecutors(int numOfAckersPerWorker) + throws InvalidTopologyException { + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + int boltParallelism = 2; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0); + + String topoName = "testTopology"; + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + + Config conf = createGrasClusterConfig(strategyClass, 50, 500, 0, null, Collections.emptyMap()); + Map genericResourcesMap = new HashMap<>(); + genericResourcesMap.put("gpu.count", 2.0); + Map supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap); + + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, topoName); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + + conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 4); + if (numOfAckersPerWorker == -1) { + // Leave topology.acker.executors.per.worker unset + } else { + conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); + } - int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); - Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); + int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); + Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); - conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); - conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); + conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); + conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), currentTime, "user"); + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), currentTime, "user"); - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - scheduler = new ResourceAwareScheduler(); + scheduler = new ResourceAwareScheduler(); - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); - // We need to have 3 slots on 3 separate hosts. The topology needs 6 GPUs 3500 MB memory and 350% CPU - // The bolt-3 instances must be on separate nodes because they each need 2 GPUs. - // The bolt-2 instances must be on the same node as they each need 1 GPU - // (this assumes that we are packing the components to avoid fragmentation). - // The bolt-1 and spout instances fill in the rest. + // We need to have 3 slots on 3 separate hosts. The topology needs 6 GPUs 3500 MB memory and 350% CPU + // The bolt-3 instances must be on separate nodes because they each need 2 GPUs. + // The bolt-2 instances must be on the same node as they each need 1 GPU + // (this assumes that we are packing the components to avoid fragmentation). + // The bolt-1 and spout instances fill in the rest. - // Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0]] - // Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2) - HashSet> expectedScheduling = new HashSet<>(); - if (numOfAckersPerWorker == -1 || numOfAckersPerWorker == 1) { - expectedScheduling.add(new HashSet<>(Collections.singletonList( - new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU - //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU - new ExecutorDetails(2, 2), //bolt-1 - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(5, 5), //bolt-2 - 500 MB, 50% CPU, 1 GPU - new ExecutorDetails(8, 8)))); //acker - 250 MB, 50% CPU, 0 GPU - //Total 1750 MB, 200% CPU, 2 GPU -> this node has 250 MB, 0% CPU, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(4, 4), //bolt-3 500 MB, 50% cpu, 2 GPU - new ExecutorDetails(1, 1), //bolt-1 - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(7, 7) ))); //acker - 250 MB, 50% CPU, 0 GPU - //Total 1750 MB, 200% CPU, 2 GPU -> this node has 250 MB, 0% CPU, 0 GPU left - } else if (numOfAckersPerWorker == 0) { - expectedScheduling.add(new HashSet<>(Collections.singletonList( - new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU - //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU - new ExecutorDetails(2, 2), //bolt-1 - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(5, 5), //bolt-2 - 500 MB, 50% CPU, 1 GPU - new ExecutorDetails(1, 1)))); //bolt-1 - 500 MB, 50% CPU, 0 GPU - //Total 2000 MB, 200% CPU, 2 GPU -> this node has 0 MB, 0% CPU, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(4, 4)))); //bolt-3 500 MB, 50% cpu, 2 GPU - //Total 1000 MB, 100% CPU, 2 GPU -> this node has 1000 MB, 100% CPU, 0 GPU left - } else if (numOfAckersPerWorker == 2) { + // Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0]] + // Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2) + HashSet> expectedScheduling = new HashSet<>(); expectedScheduling.add(new HashSet<>(Collections.singletonList( - new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU + new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(7, 7), //acker - 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(8, 8), //acker - 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU - new ExecutorDetails(2, 2)))); //bolt-1 - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(7, 7), //acker - 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(8, 8), //acker - 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(2, 2)))); //bolt-1 - 500 MB, 50% CPU, 0 GPU //Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(9, 9), //acker- 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(10, 10), //acker- 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(1, 1), //bolt-1 - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(4, 4)))); //bolt-3 500 MB, 50% cpu, 2 GPU + new ExecutorDetails(9, 9), //acker- 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(10, 10), //acker- 250 MB, 50% CPU, 0 GPU + new ExecutorDetails(1, 1), //bolt-1 - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(4, 4)))); //bolt-3 500 MB, 50% cpu, 2 GPU //Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(5, 5)))); //bolt-2 - 500 MB, 50% CPU, 1 GPU + new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU + new ExecutorDetails(5, 5)))); //bolt-2 - 500 MB, 50% CPU, 1 GPU //Total 1000 MB, 100% CPU, 2 GPU -> this node has 1000 MB, 100% CPU, 0 GPU left - } - HashSet> foundScheduling = new HashSet<>(); - SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); - for (Collection execs : assignment.getSlotToExecutors().values()) { - foundScheduling.add(new HashSet<>(execs)); - } - assertEquals(expectedScheduling, foundScheduling); - } - - /** - * Test if the scheduling logic for the GenericResourceAwareStrategy is correct - * with setting {@link Config#TOPOLOGY_ACKER_EXECUTORS}. - * - * Test details refer to {@link TestDefaultResourceAwareStrategy#testDefaultResourceAwareStrategyWithSettingAckerExecutors(int)} - */ - @ParameterizedTest - @ValueSource(ints = {-1, 0, 2, 200}) - public void testGenericResourceAwareStrategyWithSettingAckerExecutors(int numOfAckersPerWorker) - throws InvalidTopologyException { - int spoutParallelism = 1; - int boltParallelism = 2; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0); - - String topoName = "testTopology"; - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - - Config conf = createGrasClusterConfig(50, 500, 0, null, Collections.emptyMap()); - Map genericResourcesMap = new HashMap<>(); - genericResourcesMap.put("gpu.count", 2.0); - Map supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap); - - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, topoName); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000); - conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); - - conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 4); - if (numOfAckersPerWorker == -1) { - // Leave topology.acker.executors.per.worker unset - } else { - conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker); - } + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } - int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormTopology); - Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker); - - conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250); - conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50); - - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), currentTime, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - scheduler = new ResourceAwareScheduler(); - - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - - // We need to have 3 slots on 3 separate hosts. The topology needs 6 GPUs 3500 MB memory and 350% CPU - // The bolt-3 instances must be on separate nodes because they each need 2 GPUs. - // The bolt-2 instances must be on the same node as they each need 1 GPU - // (this assumes that we are packing the components to avoid fragmentation). - // The bolt-1 and spout instances fill in the rest. - - // Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0]] - // Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2) - HashSet> expectedScheduling = new HashSet<>(); - expectedScheduling.add(new HashSet<>(Collections.singletonList( - new ExecutorDetails(3, 3)))); //bolt-3 - 500 MB, 50% CPU, 2 GPU - //Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(7, 7), //acker - 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(8, 8), //acker - 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(6, 6), //bolt-2 - 500 MB, 50% CPU, 1 GPU - new ExecutorDetails(2, 2)))); //bolt-1 - 500 MB, 50% CPU, 0 GPU - //Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(9, 9), //acker- 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(10, 10), //acker- 250 MB, 50% CPU, 0 GPU - new ExecutorDetails(1, 1), //bolt-1 - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(4, 4)))); //bolt-3 500 MB, 50% cpu, 2 GPU - //Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //Spout - 500 MB, 50% CPU, 0 GPU - new ExecutorDetails(5, 5)))); //bolt-2 - 500 MB, 50% CPU, 1 GPU - //Total 1000 MB, 100% CPU, 2 GPU -> this node has 1000 MB, 100% CPU, 0 GPU left - - HashSet> foundScheduling = new HashSet<>(); - SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); - for (Collection execs : assignment.getSlotToExecutors().values()) { - foundScheduling.add(new HashSet<>(execs)); + assertEquals(expectedScheduling, foundScheduling); } - - assertEquals(expectedScheduling, foundScheduling); } private TopologyDetails createTestStormTopology(StormTopology stormTopology, int priority, String name, Config conf) { @@ -430,56 +436,58 @@ private TopologyDetails createTestStormTopology(StormTopology stormTopology, int */ @Test public void testGrasRequiringEviction() { - int spoutParallelism = 3; - double cpuPercent = 10; - double memoryOnHeap = 10; - double memoryOffHeap = 10; - // Sufficient Cpu/Memory. But insufficient gpu to schedule all topologies (gpu1, noGpu, gpu2). - - // gpu topology (requires 3 gpu's in total) - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), spoutParallelism).addResource("gpu.count", 1.0); - StormTopology stormTopologyWithGpu = builder.createTopology(); - - // non-gpu topology - builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), spoutParallelism); - StormTopology stormTopologyNoGpu = builder.createTopology(); - - Config conf = createGrasClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap()); - conf.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_MAX_TOPOLOGY_SCHEDULING_ATTEMPTS, 2); // allow 1 round of evictions - - String gpu1 = "hasGpu1"; - String noGpu = "hasNoGpu"; - String gpu2 = "hasGpu2"; - TopologyDetails topo[] = { - createTestStormTopology(stormTopologyWithGpu, 10, gpu1, conf), - createTestStormTopology(stormTopologyNoGpu, 10, noGpu, conf), - createTestStormTopology(stormTopologyWithGpu, 9, gpu2, conf) - }; - Topologies topologies = new Topologies(topo[0], topo[1]); - - Map genericResourcesMap = new HashMap<>(); - genericResourcesMap.put("gpu.count", 1.0); - Map supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap); - Cluster cluster = new Cluster(new INimbusTest(), new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - // should schedule gpu1 and noGpu successfully - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(conf, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - assertTopologiesFullyScheduled(cluster, gpu1); - assertTopologiesFullyScheduled(cluster, noGpu); - - // should evict gpu1 and noGpu topologies in order to schedule gpu2 topology; then fail to reschedule gpu1 topology; - // then schedule noGpu topology. - // Scheduling used to ignore gpu resource when deciding when to stop evicting, and gpu2 would fail to schedule. - topologies = new Topologies(topo[0], topo[1], topo[2]); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - assertTopologiesNotScheduled(cluster, gpu1); - assertTopologiesFullyScheduled(cluster, noGpu); - assertTopologiesFullyScheduled(cluster, gpu2); + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 3; + double cpuPercent = 10; + double memoryOnHeap = 10; + double memoryOffHeap = 10; + // Sufficient Cpu/Memory. But insufficient gpu to schedule all topologies (gpu1, noGpu, gpu2). + + // gpu topology (requires 3 gpu's in total) + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), spoutParallelism).addResource("gpu.count", 1.0); + StormTopology stormTopologyWithGpu = builder.createTopology(); + + // non-gpu topology + builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), spoutParallelism); + StormTopology stormTopologyNoGpu = builder.createTopology(); + + Config conf = createGrasClusterConfig(strategyClass, cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap()); + conf.put(DaemonConfig.RESOURCE_AWARE_SCHEDULER_MAX_TOPOLOGY_SCHEDULING_ATTEMPTS, 2); // allow 1 round of evictions + + String gpu1 = "hasGpu1"; + String noGpu = "hasNoGpu"; + String gpu2 = "hasGpu2"; + TopologyDetails topo[] = { + createTestStormTopology(stormTopologyWithGpu, 10, gpu1, conf), + createTestStormTopology(stormTopologyNoGpu, 10, noGpu, conf), + createTestStormTopology(stormTopologyWithGpu, 9, gpu2, conf) + }; + Topologies topologies = new Topologies(topo[0], topo[1]); + + Map genericResourcesMap = new HashMap<>(); + genericResourcesMap.put("gpu.count", 1.0); + Map supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap); + Cluster cluster = new Cluster(new INimbusTest(), new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + // should schedule gpu1 and noGpu successfully + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + assertTopologiesFullyScheduled(cluster, strategyClass, gpu1); + assertTopologiesFullyScheduled(cluster, strategyClass, noGpu); + + // should evict gpu1 and noGpu topologies in order to schedule gpu2 topology; then fail to reschedule gpu1 topology; + // then schedule noGpu topology. + // Scheduling used to ignore gpu resource when deciding when to stop evicting, and gpu2 would fail to schedule. + topologies = new Topologies(topo[0], topo[1], topo[2]); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + assertTopologiesNotScheduled(cluster, strategyClass, gpu1); + assertTopologiesFullyScheduled(cluster, strategyClass, noGpu); + assertTopologiesFullyScheduled(cluster, strategyClass, gpu2); + } } /** @@ -488,124 +496,186 @@ public void testGrasRequiringEviction() { @Test public void testGenericResourceAwareStrategyInFavorOfShuffle() throws InvalidTopologyException { - int spoutParallelism = 1; - int boltParallelism = 2; - TopologyBuilder builder = new TopologyBuilder(); - builder.setSpout("spout", new TestSpout(), - spoutParallelism); - builder.setBolt("bolt-1", new TestBolt(), - boltParallelism).shuffleGrouping("spout"); - builder.setBolt("bolt-2", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0); - builder.setBolt("bolt-3", new TestBolt(), - boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0); - - StormTopology stormTopology = builder.createTopology(); - - INimbus iNimbus = new INimbusTest(); - - Config conf = createGrasClusterConfig(50, 250, 250, null, Collections.emptyMap()); - Map genericResourcesMap = new HashMap<>(); - genericResourcesMap.put("gpu.count", 2.0); - Map supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap); - - - conf.put(Config.TOPOLOGY_PRIORITY, 0); - conf.put(Config.TOPOLOGY_NAME, "testTopology"); - conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); - conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); - conf.put(Config.TOPOLOGY_RAS_ORDER_EXECUTORS_BY_PROXIMITY_NEEDS, true); - - TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, - genExecsAndComps(StormCommon.systemTopology(conf,stormTopology)), currentTime, "user"); - - Topologies topologies = new Topologies(topo); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); - - ResourceAwareScheduler rs = new ResourceAwareScheduler(); - - rs.prepare(conf, new StormMetricsRegistry()); - rs.schedule(topologies, cluster); - // Sorted execs: [[0, 0], [2, 2], [6, 6], [4, 4], [1, 1], [5, 5], [3, 3], [7, 7]] - // Ackers: [[7, 7]]] - - HashSet> expectedScheduling = new HashSet<>(); - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(0, 0), //spout - new ExecutorDetails(2, 2), //bolt-1 - new ExecutorDetails(6, 6), //bolt-2 - new ExecutorDetails(7, 7)))); //acker - expectedScheduling.add(new HashSet<>(Arrays.asList( - new ExecutorDetails(4, 4), //bolt-3 - new ExecutorDetails(1, 1)))); //bolt-1 - expectedScheduling.add(new HashSet<>(Collections.singletonList(new ExecutorDetails(5, 5)))); //bolt-2 - expectedScheduling.add(new HashSet<>(Collections.singletonList(new ExecutorDetails(3, 3)))); //bolt-3 - HashSet> foundScheduling = new HashSet<>(); - SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); - for (Collection execs : assignment.getSlotToExecutors().values()) { - foundScheduling.add(new HashSet<>(execs)); - } + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + int boltParallelism = 2; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), + spoutParallelism); + builder.setBolt("bolt-1", new TestBolt(), + boltParallelism).shuffleGrouping("spout"); + builder.setBolt("bolt-2", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0); + builder.setBolt("bolt-3", new TestBolt(), + boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0); + + StormTopology stormTopology = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + + Config conf = createGrasClusterConfig(strategyClass, 50, 250, 250, null, Collections.emptyMap()); + Map genericResourcesMap = new HashMap<>(); + genericResourcesMap.put("gpu.count", 2.0); + Map supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap); + + + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, "testTopology"); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + conf.put(Config.TOPOLOGY_RAS_ORDER_EXECUTORS_BY_PROXIMITY_NEEDS, true); - assertEquals(expectedScheduling, foundScheduling); + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormTopology, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormTopology)), currentTime, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + ResourceAwareScheduler rs = new ResourceAwareScheduler(); + + rs.prepare(conf, new StormMetricsRegistry()); + rs.schedule(topologies, cluster); + // Sorted execs: [[0, 0], [2, 2], [6, 6], [4, 4], [1, 1], [5, 5], [3, 3], [7, 7]] + // Ackers: [[7, 7]]] + + HashSet> expectedScheduling = new HashSet<>(); + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0), //spout + new ExecutorDetails(2, 2), //bolt-1 + new ExecutorDetails(6, 6), //bolt-2 + new ExecutorDetails(7, 7)))); //acker + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(4, 4), //bolt-3 + new ExecutorDetails(1, 1)))); //bolt-1 + expectedScheduling.add(new HashSet<>(Collections.singletonList(new ExecutorDetails(5, 5)))); //bolt-2 + expectedScheduling.add(new HashSet<>(Collections.singletonList(new ExecutorDetails(3, 3)))); //bolt-3 + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } + assertEquals(expectedScheduling, foundScheduling); + } } @Test public void testAntiAffinityWithMultipleTopologies() { - INimbus iNimbus = new INimbusTest(); - Map supMap = genSupervisorsWithRacks(1, 40, 66, 0, 0, 4700, 226200, new HashMap<>()); - HashMap extraResources = new HashMap<>(); - extraResources.put("my.gpu", 1.0); - supMap.putAll(genSupervisorsWithRacks(1, 40, 66, 1, 0, 4700, 226200, extraResources)); - - Config config = new Config(); - config.putAll(createGrasClusterConfig(88, 775, 25, null, null)); - - scheduler = new ResourceAwareScheduler(); - scheduler.prepare(config, new StormMetricsRegistry()); - - TopologyDetails tdSimple = genTopology("topology-simple", config, 1, - 5, 100, 300, 0, 0, "user", 8192); - - //Schedule the simple topology first - Topologies topologies = new Topologies(tdSimple); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); - scheduler.schedule(topologies, cluster); - - TopologyBuilder builder = topologyBuilder(1, 5, 100, 300); - builder.setBolt("gpu-bolt", new TestBolt(), 40) - .addResource("my.gpu", 1.0) - .shuffleGrouping("spout-0"); - TopologyDetails tdGpu = topoToTopologyDetails("topology-gpu", config, builder.createTopology(), 0, 0,"user", 8192); - - //Now schedule GPU but with the simple topology in place. - topologies = new Topologies(tdSimple, tdGpu); - cluster = new Cluster(cluster, topologies); - scheduler.schedule(topologies, cluster); - - Map assignments = new TreeMap<>(cluster.getAssignments()); - assertEquals(2, assignments.size()); - - Map> topoPerRackCount = new HashMap<>(); - for (Entry entry: assignments.entrySet()) { - SchedulerAssignment sa = entry.getValue(); - Map slotsPerRack = new TreeMap<>(); - for (WorkerSlot slot : sa.getSlots()) { - String nodeId = slot.getNodeId(); - String rack = supervisorIdToRackName(nodeId); - slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet(); + for (Class strategyClass: strategyClasses) { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisorsWithRacks(1, 40, 66, 0, 0, 4700, 226200, new HashMap<>()); + HashMap extraResources = new HashMap<>(); + extraResources.put("my.gpu", 1.0); + supMap.putAll(genSupervisorsWithRacks(1, 40, 66, 1, 0, 4700, 226200, extraResources)); + + Config config = new Config(); + config.putAll(createGrasClusterConfig(strategyClass, 88, 775, 25, null, null)); + + scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + TopologyDetails tdSimple = genTopology("topology-simple", config, 1, + 5, 100, 300, 0, 0, "user", 8192); + + //Schedule the simple topology first + Topologies topologies = new Topologies(tdSimple); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + scheduler.schedule(topologies, cluster); + + TopologyBuilder builder = topologyBuilder(1, 5, 100, 300); + builder.setBolt("gpu-bolt", new TestBolt(), 40) + .addResource("my.gpu", 1.0) + .shuffleGrouping("spout-0"); + TopologyDetails tdGpu = topoToTopologyDetails("topology-gpu", config, builder.createTopology(), 0, 0, "user", 8192); + + //Now schedule GPU but with the simple topology in place. + topologies = new Topologies(tdSimple, tdGpu); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + Map assignments = new TreeMap<>(cluster.getAssignments()); + assertEquals(2, assignments.size()); + + Map> topoPerRackCount = new HashMap<>(); + for (Entry entry : assignments.entrySet()) { + SchedulerAssignment sa = entry.getValue(); + Map slotsPerRack = new TreeMap<>(); + for (WorkerSlot slot : sa.getSlots()) { + String nodeId = slot.getNodeId(); + String rack = supervisorIdToRackName(nodeId); + slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet(); + } + LOG.info("{} => {}", entry.getKey(), slotsPerRack); + topoPerRackCount.put(entry.getKey(), slotsPerRack); } - LOG.info("{} => {}", entry.getKey(), slotsPerRack); - topoPerRackCount.put(entry.getKey(), slotsPerRack); + + Map simpleCount = topoPerRackCount.get("topology-simple-0"); + assertNotNull(simpleCount); + //Because the simple topology was scheduled first we want to be sure that it didn't put anything on + // the GPU nodes. + assertEquals(1, simpleCount.size()); //Only 1 rack is in use + assertFalse(simpleCount.containsKey("r001")); //r001 is the second rack with GPUs + assertTrue(simpleCount.containsKey("r000")); //r000 is the first rack with no GPUs + + //We don't really care too much about the scheduling of topology-gpu-0, because it was scheduled. } + } + + @Test + public void testScheduleLeftOverAckers() throws Exception { + for (Class strategyClass: strategyClasses) { + int spoutParallelism = 1; + TopologyBuilder builder = new TopologyBuilder(); + builder.setSpout("spout", new TestSpout(), spoutParallelism); + + String topoName = "testTopology"; + StormTopology stormToplogy = builder.createTopology(); + + INimbus iNimbus = new INimbusTest(); + Config conf = createGrasClusterConfig(strategyClass, 50, 400, 0, null, Collections.emptyMap()); + + Map supMap = genSupervisors(1, 1, 100, 1100); + Map tmpSupMap = genSupervisors(2, 1, 100, 400); + supMap.put("r000s001", tmpSupMap.get("r000s001")); + LOG.info("{}", tmpSupMap.get("r000s001")); - Map simpleCount = topoPerRackCount.get("topology-simple-0"); - assertNotNull(simpleCount); - //Because the simple topology was scheduled first we want to be sure that it didn't put anything on - // the GPU nodes. - assertEquals(1, simpleCount.size()); //Only 1 rack is in use - assertFalse(simpleCount.containsKey("r001")); //r001 is the second rack with GPUs - assertTrue(simpleCount.containsKey("r000")); //r000 is the first rack with no GPUs + conf.put(Config.TOPOLOGY_PRIORITY, 0); + conf.put(Config.TOPOLOGY_NAME, topoName); + conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 1000); + conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user"); + conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 2); + conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, 1); - //We don't really care too much about the scheduling of topology-gpu-0, because it was scheduled. + conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 500); + conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 0); + + TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, + genExecsAndComps(StormCommon.systemTopology(conf, stormToplogy)), currentTime, "user"); + + Topologies topologies = new Topologies(topo); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); + + scheduler = new ResourceAwareScheduler(); + + scheduler.prepare(conf, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + + // First it tries too schedule spout [0, 0] with a bound acker [1, 1] to sup1 r000s000. + // However, sup2 r000s001 only has 400 on-heap mem which can not fit the left over acker [2, 2] + // So it backtrack to [0, 0] and put it to sup2 r000s001. + // Then put two ackers both as left-over ackers to sup1 r000s000. + HashSet> expectedScheduling = new HashSet<>(); + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(0, 0)))); // spout + expectedScheduling.add(new HashSet<>(Arrays.asList( + new ExecutorDetails(1, 1), // acker + new ExecutorDetails(2, 2)))); // acker + + HashSet> foundScheduling = new HashSet<>(); + SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id"); + for (Collection execs : assignment.getSlotToExecutors().values()) { + foundScheduling.add(new HashSet<>(execs)); + } + assertEquals(expectedScheduling, foundScheduling); + } } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestLargeCluster.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestLargeCluster.java index 9c31f5fe040..ea15b2f620f 100644 --- a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestLargeCluster.java +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestLargeCluster.java @@ -69,6 +69,11 @@ @ExtendWith({NormalizedResourcesExtension.class}) public class TestLargeCluster { private static final Logger LOG = LoggerFactory.getLogger(TestLargeCluster.class); + private static final Class[] strategyClasses = { + DefaultResourceAwareStrategy.class, + RoundRobinResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, + }; public enum TEST_CLUSTER_NAME { TEST_CLUSTER_01("largeCluster01"), @@ -383,65 +388,67 @@ private static Map createSupervisors( */ @Test public void testLargeCluster() throws Exception { - for (TEST_CLUSTER_NAME testClusterName: TEST_CLUSTER_NAME.values()) { - LOG.info("********************************************"); - LOG.info("testLargeCluster: Start Processing cluster {}", testClusterName.getClusterName()); - - String resourcePath = testClusterName.getResourcePath(); - Map supervisors = createSupervisors(testClusterName, 0); - - TopologyDetails[] topoDetailsArray = createTopoDetailsArray(resourcePath, false); - assertTrue(topoDetailsArray.length > 0, "No topologies found for cluster " + testClusterName.getClusterName()); - Topologies topologies = new Topologies(topoDetailsArray); - - Config confWithDefaultStrategy = new Config(); - confWithDefaultStrategy.putAll(topoDetailsArray[0].getConf()); - confWithDefaultStrategy.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, DefaultResourceAwareStrategy.class.getName()); - confWithDefaultStrategy.put( - Config.STORM_NETWORK_TOPOGRAPHY_PLUGIN, - TestUtilsForResourceAwareScheduler.GenSupervisorsDnsToSwitchMapping.class.getName()); - - INimbus iNimbus = new INimbusTest(); - Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supervisors, new HashMap<>(), - topologies, confWithDefaultStrategy); - - scheduler = new ResourceAwareScheduler(); + for (Class strategyClass: strategyClasses) { + for (TEST_CLUSTER_NAME testClusterName : TEST_CLUSTER_NAME.values()) { + LOG.info("********************************************"); + LOG.info("testLargeCluster: Start Processing cluster {} using ", testClusterName.getClusterName(), strategyClass.getName()); + + String resourcePath = testClusterName.getResourcePath(); + Map supervisors = createSupervisors(testClusterName, 0); + + TopologyDetails[] topoDetailsArray = createTopoDetailsArray(resourcePath, false); + assertTrue(topoDetailsArray.length > 0, "No topologies found for cluster " + testClusterName.getClusterName()); + Topologies topologies = new Topologies(topoDetailsArray); + + Config confWithDefaultStrategy = new Config(); + confWithDefaultStrategy.putAll(topoDetailsArray[0].getConf()); + confWithDefaultStrategy.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); + confWithDefaultStrategy.put( + Config.STORM_NETWORK_TOPOGRAPHY_PLUGIN, + TestUtilsForResourceAwareScheduler.GenSupervisorsDnsToSwitchMapping.class.getName()); + + INimbus iNimbus = new INimbusTest(); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supervisors, new HashMap<>(), + topologies, confWithDefaultStrategy); + + scheduler = new ResourceAwareScheduler(); + + List classesToDebug = Arrays.asList(DefaultResourceAwareStrategy.class, + GenericResourceAwareStrategy.class, ResourceAwareScheduler.class, + Cluster.class + ); + Level logLevel = Level.INFO; // switch to Level.DEBUG for verbose otherwise Level.INFO + classesToDebug.forEach(x -> Configurator.setLevel(x.getName(), logLevel)); + long startTime = System.currentTimeMillis(); + scheduler.prepare(confWithDefaultStrategy, new StormMetricsRegistry()); + scheduler.schedule(topologies, cluster); + long endTime = System.currentTimeMillis(); + LOG.info("Cluster={} Scheduling Time: {} topologies in {} seconds", + testClusterName.getClusterName(), topoDetailsArray.length, (endTime - startTime) / 1000.0); + + for (TopologyDetails td : topoDetailsArray) { + TestUtilsForResourceAwareScheduler.assertTopologiesFullyScheduled(cluster, strategyClass, td.getName()); + } - List classesToDebug = Arrays.asList(DefaultResourceAwareStrategy.class, - GenericResourceAwareStrategy.class, ResourceAwareScheduler.class, - Cluster.class - ); - Level logLevel = Level.INFO ; // switch to Level.DEBUG for verbose otherwise Level.INFO - classesToDebug.forEach(x -> Configurator.setLevel(x.getName(), logLevel)); - long startTime = System.currentTimeMillis(); - scheduler.prepare(confWithDefaultStrategy, new StormMetricsRegistry()); - scheduler.schedule(topologies, cluster); - long endTime = System.currentTimeMillis(); - LOG.info("Cluster={} Scheduling Time: {} topologies in {} seconds", - testClusterName.getClusterName(), topoDetailsArray.length, (endTime - startTime) / 1000.0); - - for (TopologyDetails td : topoDetailsArray) { - TestUtilsForResourceAwareScheduler.assertTopologiesFullyScheduled(cluster, td.getName()); - } + // Remove topology and reschedule it + for (int i = 0; i < topoDetailsArray.length; i++) { + startTime = System.currentTimeMillis(); + TopologyDetails topoDetails = topoDetailsArray[i]; + cluster.unassign(topoDetails.getId()); + LOG.info("Cluster={}, ({}) Removed topology {}", testClusterName.getClusterName(), i, topoDetails.getName()); + IScheduler rescheduler = new ResourceAwareScheduler(); + rescheduler.prepare(confWithDefaultStrategy, new StormMetricsRegistry()); + rescheduler.schedule(topologies, cluster); + TestUtilsForResourceAwareScheduler.assertTopologiesFullyScheduled(cluster, strategyClass, topoDetails.getName()); + endTime = System.currentTimeMillis(); + LOG.info("Cluster={}, ({}) Scheduling Time: Removed topology {} and rescheduled in {} seconds", + testClusterName.getClusterName(), i, topoDetails.getName(), (endTime - startTime) / 1000.0); + } + classesToDebug.forEach(x -> Configurator.setLevel(x.getName(), Level.INFO)); - // Remove topology and reschedule it - for (int i = 0 ; i < topoDetailsArray.length ; i++) { - startTime = System.currentTimeMillis(); - TopologyDetails topoDetails = topoDetailsArray[i]; - cluster.unassign(topoDetails.getId()); - LOG.info("Cluster={}, ({}) Removed topology {}", testClusterName.getClusterName(), i, topoDetails.getName()); - IScheduler rescheduler = new ResourceAwareScheduler(); - rescheduler.prepare(confWithDefaultStrategy, new StormMetricsRegistry()); - rescheduler.schedule(topologies, cluster); - TestUtilsForResourceAwareScheduler.assertTopologiesFullyScheduled(cluster, topoDetails.getName()); - endTime = System.currentTimeMillis(); - LOG.info("Cluster={}, ({}) Scheduling Time: Removed topology {} and rescheduled in {} seconds", - testClusterName.getClusterName(), i, topoDetails.getName(), (endTime - startTime) / 1000.0); + LOG.info("testLargeCluster: End Processing cluster {}", testClusterName.getClusterName()); + LOG.info("********************************************"); } - classesToDebug.forEach(x -> Configurator.setLevel(x.getName(), Level.INFO)); - - LOG.info("testLargeCluster: End Processing cluster {}", testClusterName.getClusterName()); - LOG.info("********************************************"); } } diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestRoundRobinNodeSorterHostIsolation.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestRoundRobinNodeSorterHostIsolation.java new file mode 100644 index 00000000000..3d436a63d3f --- /dev/null +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/TestRoundRobinNodeSorterHostIsolation.java @@ -0,0 +1,265 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.storm.scheduler.resource.strategies.scheduling; + +import com.google.common.collect.Sets; +import org.apache.storm.Config; +import org.apache.storm.metric.StormMetricsRegistry; +import org.apache.storm.networktopography.DNSToSwitchMapping; +import org.apache.storm.scheduler.Cluster; +import org.apache.storm.scheduler.INimbus; +import org.apache.storm.scheduler.IScheduler; +import org.apache.storm.scheduler.SchedulerAssignment; +import org.apache.storm.scheduler.SupervisorDetails; +import org.apache.storm.scheduler.Topologies; +import org.apache.storm.scheduler.TopologyDetails; +import org.apache.storm.scheduler.WorkerSlot; +import org.apache.storm.scheduler.resource.ResourceAwareScheduler; +import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler; +import org.apache.storm.scheduler.resource.normalization.NormalizedResourcesExtension; +import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.INimbusTest; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.createRoundRobinClusterConfig; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.genSupervisorsWithRacksAndNuma; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.genTopology; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.supervisorIdToRackName; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +@ExtendWith({NormalizedResourcesExtension.class}) +public class TestRoundRobinNodeSorterHostIsolation { + private static final Logger LOG = LoggerFactory.getLogger(TestRoundRobinNodeSorterHostIsolation.class); + private static final int CURRENT_TIME = 1450418597; + private static final Class strategyClass = RoundRobinResourceAwareStrategy.class; + + private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + Map> pools) { + Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); + return config; + } + + private static class TestDNSToSwitchMapping implements DNSToSwitchMapping { + private final Map hostToRackMap; + private final Map> rackToHosts; + + @SafeVarargs + public TestDNSToSwitchMapping(Map... racks) { + Set seenHosts = new HashSet<>(); + Map hostToRackMap = new HashMap<>(); + Map> rackToHosts = new HashMap<>(); + for (int rackNum = 0; rackNum < racks.length; rackNum++) { + String rack = String.format("rack-%03d", rackNum); + for (SupervisorDetails sup : racks[rackNum].values()) { + hostToRackMap.put(sup.getHost(), rack); + String host = sup.getHost(); + if (!seenHosts.contains(host)) { + rackToHosts.computeIfAbsent(rack, rid -> new ArrayList<>()).add(host); + seenHosts.add(host); + } + } + } + this.hostToRackMap = Collections.unmodifiableMap(hostToRackMap); + this.rackToHosts = Collections.unmodifiableMap(rackToHosts); + } + + /** + * Use the "rack-%03d" embedded in the name of the supervisor to determine the rack number. + * + * @param supervisorDetailsCollection + */ + public TestDNSToSwitchMapping(Collection supervisorDetailsCollection) { + Set seenHosts = new HashSet<>(); + Map hostToRackMap = new HashMap<>(); + Map> rackToHosts = new HashMap<>(); + + for (SupervisorDetails supervisorDetails: supervisorDetailsCollection) { + String rackId = supervisorIdToRackName(supervisorDetails.getId()); + hostToRackMap.put(supervisorDetails.getHost(), rackId); + String host = supervisorDetails.getHost(); + if (!seenHosts.contains(host)) { + rackToHosts.computeIfAbsent(rackId, rid -> new ArrayList<>()).add(host); + seenHosts.add(host); + } + } + this.hostToRackMap = Collections.unmodifiableMap(hostToRackMap); + this.rackToHosts = Collections.unmodifiableMap(rackToHosts); + } + + @Override + public Map resolve(List names) { + return hostToRackMap; + } + + public Map> getRackToHosts() { + return rackToHosts; + } + } + + /** + * Free one-fifth of WorkerSlots. + */ + private void freeSomeWorkerSlots(Cluster cluster) { + Map assignmentMap = cluster.getAssignments(); + for (SchedulerAssignment schedulerAssignment: assignmentMap.values()) { + int i = 0; + List slotsToKill = new ArrayList<>(); + for (WorkerSlot workerSlot: schedulerAssignment.getSlots()) { + i++; + if (i % 5 == 0) { + slotsToKill.add(workerSlot); + } + } + cluster.freeSlots(slotsToKill); + } + } + + /** + * Test whether number of nodes is limited by {@link Config#TOPOLOGY_ISOLATED_MACHINES} by scheduling + * two topologies and verifying the number of nodes that each one occupies and are not overlapping. + */ + @Test + void testTopologyIsolation() { + INimbus iNimbus = new INimbusTest(); + double compPcore = 100; + double compOnHeap = 775; + double compOffHeap = 25; + int[] topoNumSpouts = {1,1}; + int[] topoNumBolts = {1,1}; + int[] topoSpoutParallelism = {100, 100}; + int[] topoBoltParallelism = {200, 200}; + final int numRacks = 3; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 6; + final int numZonesPerHost = 1; + final double numaResourceMultiplier = 1.0; + int rackStartNum = 0; + int supStartNum = 0; + long compPerRack = (topoNumSpouts[0] * topoSpoutParallelism[0] + topoNumBolts[0] * topoBoltParallelism[0] + + topoNumSpouts[1] * topoSpoutParallelism[1]); // enough for topo1 but not topo1+topo2 + long compPerSuper = compPerRack / numSupersPerRack; + double cpuPerSuper = compPcore * compPerSuper; + double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper; + double[] topoMaxHeapSize = {memPerSuper, memPerSuper}; + final String[] topoNames = {"topology1", "topology2"}; + int[] maxNodes = {15, 13}; + + Map supMap = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, + cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier); + TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values()); + + Config[] configs = new Config[topoNames.length]; + TopologyDetails[] topos = new TopologyDetails[topoNames.length]; + for (int i = 0 ; i < topoNames.length ; i++) { + configs[i] = new Config(); + configs[i].putAll(createRoundRobinClusterConfig(compPcore, compOnHeap, compOffHeap, null, null)); + configs[i].put(Config.TOPOLOGY_ISOLATED_MACHINES, maxNodes[i]); + topos[i] = genTopology(topoNames[i], configs[i], topoNumSpouts[i], + topoNumBolts[i], topoSpoutParallelism[i], topoBoltParallelism[i], 0, 0, "user", topoMaxHeapSize[i]); + } + TopologyDetails td1 = topos[0]; + TopologyDetails td2 = topos[1]; + + IScheduler scheduler = new ResourceAwareScheduler(); + scheduler.prepare(configs[0], new StormMetricsRegistry()); + + //Schedule the topo1 topology and ensure it uses limited number of nodes + Topologies topologies = new Topologies(td1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, configs[0]); + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + scheduler.schedule(topologies, cluster); + Set assignedRacks = cluster.getAssignedRacks(topos[0].getId()); + assertEquals(2 , assignedRacks.size(), "Racks for topology=" + td1.getId() + " is " + assignedRacks); + + //Now schedule GPU but with the simple topology in place. + topologies = new Topologies(td1, td2); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + assignedRacks = cluster.getAssignedRacks(td1.getId(), td2.getId()); + assertEquals(numRacks, assignedRacks.size(), "Racks for topologies=" + td1.getId() + "/" + td2.getId() + " is " + assignedRacks); + + SchedulerAssignment[] assignments = new SchedulerAssignment[topoNames.length]; + Collection[] assignmentNodes = new Collection[topoNames.length]; + for (int i = 0 ; i < topoNames.length ; i++) { + assignments[i] = cluster.getAssignmentById(topos[i].getId()); + if (assignments[i] == null) { + fail("Topology " + topoNames[i] + " cannot be scheduled"); + } + assignmentNodes[i] = assignments[i].getSlots().stream().map(WorkerSlot::getNodeId).collect(Collectors.toList()); + assertEquals(maxNodes[i], assignmentNodes[i].size(), "Max Nodes for " + topoNames[i] + " assignment"); + } + // confirm no overlap in nodes + Set nodes1 = new HashSet<>(assignmentNodes[0]); + Set nodes2 = new HashSet<>(assignmentNodes[1]); + Set dupNodes = Sets.intersection(nodes1, nodes2); + if (dupNodes.size() > 0) { + List lines = new ArrayList<>(); + lines.add("Topologies shared nodes when not expected to"); + lines.add("Duplicated nodes are " + String.join(",", dupNodes)); + fail(String.join("\n\t", lines)); + } + nodes2.removeAll(nodes1); + + // topo2 gets scheduled on across the two racks even if there is one rack with enough capacity + assignedRacks = cluster.getAssignedRacks(td2.getId()); + assertEquals(numRacks -1, assignedRacks.size(), "Racks for topologies=" + td2.getId() + " is " + assignedRacks); + + // now unassign topo2, expect only two of three racks to be in use; free some slots and reschedule topo1 some topo1 executors + cluster.unassign(td2.getId()); + assignedRacks = cluster.getAssignedRacks(td2.getId()); + assertEquals(0, assignedRacks.size(), + "After unassigning topology " + td2.getId() + ", racks for topology=" + td2.getId() + " is " + assignedRacks); + assignedRacks = cluster.getAssignedRacks(td1.getId()); + assertEquals(numRacks - 1, assignedRacks.size(), + "After unassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks); + assertFalse(cluster.needsSchedulingRas(td1), + "Topology " + td1.getId() + " should be fully assigned before freeing slots"); + freeSomeWorkerSlots(cluster); + assertTrue(cluster.needsSchedulingRas(td1), + "Topology " + td1.getId() + " should need scheduling after freeing slots"); + + // then reschedule executors + scheduler.schedule(topologies, cluster); + + // only two of three racks should be in use still + assignedRacks = cluster.getAssignedRacks(td1.getId()); + assertEquals(numRacks - 1, assignedRacks.size(), + "After reassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks); + } +} \ No newline at end of file diff --git a/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/sorter/TestRoundRobinNodeSorterHostProximity.java b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/sorter/TestRoundRobinNodeSorterHostProximity.java new file mode 100644 index 00000000000..c43ffa71ad9 --- /dev/null +++ b/storm-server/src/test/java/org/apache/storm/scheduler/resource/strategies/scheduling/sorter/TestRoundRobinNodeSorterHostProximity.java @@ -0,0 +1,944 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.storm.scheduler.resource.strategies.scheduling.sorter; + +import org.apache.storm.Config; +import org.apache.storm.metric.StormMetricsRegistry; +import org.apache.storm.networktopography.DNSToSwitchMapping; +import org.apache.storm.scheduler.Cluster; +import org.apache.storm.scheduler.ExecutorDetails; +import org.apache.storm.scheduler.INimbus; +import org.apache.storm.scheduler.IScheduler; +import org.apache.storm.scheduler.SchedulerAssignment; +import org.apache.storm.scheduler.SupervisorDetails; +import org.apache.storm.scheduler.Topologies; +import org.apache.storm.scheduler.TopologyDetails; +import org.apache.storm.scheduler.WorkerSlot; +import org.apache.storm.scheduler.resource.RasNodes; +import org.apache.storm.scheduler.resource.ResourceAwareScheduler; +import org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler; +import org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest; +import org.apache.storm.scheduler.resource.normalization.NormalizedResourcesExtension; +import org.apache.storm.scheduler.resource.normalization.ResourceMetrics; +import org.apache.storm.scheduler.resource.strategies.scheduling.BaseResourceAwareStrategy; +import org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem; +import org.apache.storm.scheduler.resource.strategies.scheduling.RoundRobinResourceAwareStrategy; +import org.apache.storm.topology.TopologyBuilder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.INimbusTest; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.TestBolt; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.createRoundRobinClusterConfig; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.genSupervisorsWithRacks; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.genSupervisorsWithRacksAndNuma; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.genTopology; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.supervisorIdToRackName; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.topoToTopologyDetails; +import static org.apache.storm.scheduler.resource.TestUtilsForResourceAwareScheduler.topologyBuilder; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +@ExtendWith({NormalizedResourcesExtension.class}) +public class TestRoundRobinNodeSorterHostProximity { + private static final Logger LOG = LoggerFactory.getLogger(TestRoundRobinNodeSorterHostProximity.class); + private static final int CURRENT_TIME = 1450418597; + private static final Class strategyClass = RoundRobinResourceAwareStrategy.class; + + private Config createClusterConfig(double compPcore, double compOnHeap, double compOffHeap, + Map> pools) { + Config config = TestUtilsForResourceAwareScheduler.createClusterConfig(compPcore, compOnHeap, compOffHeap, pools); + config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategyClass.getName()); + return config; + } + + private static class TestDNSToSwitchMapping implements DNSToSwitchMapping { + private final Map hostToRackMap; + private final Map> rackToHosts; + + @SafeVarargs + public TestDNSToSwitchMapping(Map... racks) { + Set seenHosts = new HashSet<>(); + Map hostToRackMap = new HashMap<>(); + Map> rackToHosts = new HashMap<>(); + for (int rackNum = 0; rackNum < racks.length; rackNum++) { + String rack = String.format("rack-%03d", rackNum); + for (SupervisorDetails sup : racks[rackNum].values()) { + hostToRackMap.put(sup.getHost(), rack); + String host = sup.getHost(); + if (!seenHosts.contains(host)) { + rackToHosts.computeIfAbsent(rack, rid -> new ArrayList<>()).add(host); + seenHosts.add(host); + } + } + } + this.hostToRackMap = Collections.unmodifiableMap(hostToRackMap); + this.rackToHosts = Collections.unmodifiableMap(rackToHosts); + } + + /** + * Use the "rack-%03d" embedded in the name of the supervisor to determine the rack number. + * + * @param supervisorDetailsCollection + */ + public TestDNSToSwitchMapping(Collection supervisorDetailsCollection) { + Set seenHosts = new HashSet<>(); + Map hostToRackMap = new HashMap<>(); + Map> rackToHosts = new HashMap<>(); + + for (SupervisorDetails supervisorDetails: supervisorDetailsCollection) { + String rackId = supervisorIdToRackName(supervisorDetails.getId()); + hostToRackMap.put(supervisorDetails.getHost(), rackId); + String host = supervisorDetails.getHost(); + if (!seenHosts.contains(host)) { + rackToHosts.computeIfAbsent(rackId, rid -> new ArrayList<>()).add(host); + seenHosts.add(host); + } + } + this.hostToRackMap = Collections.unmodifiableMap(hostToRackMap); + this.rackToHosts = Collections.unmodifiableMap(rackToHosts); + } + + @Override + public Map resolve(List names) { + return hostToRackMap; + } + + public Map> getRackToHosts() { + return rackToHosts; + } + } + + /** + * Test whether strategy will choose correct rack. + */ + @Test + public void testMultipleRacksWithFavoritism() { + final Map supMap = new HashMap<>(); + final int numRacks = 1; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 4; + final int numZonesPerHost = 2; + int rackStartNum = 0; + int supStartNum = 0; + final Map supMapRack0 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 400, 8000, Collections.emptyMap(), 1.0); + + //generate another rack of supervisors with less resources + supStartNum += numSupersPerRack; + final Map supMapRack1 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 200, 4000, Collections.emptyMap(), 1.0); + + //generate some supervisors that are depleted of one resource + supStartNum += numSupersPerRack; + final Map supMapRack2 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 0, 8000, Collections.emptyMap(), 1.0); + + //generate some that has a lot of memory but little of cpu + supStartNum += numSupersPerRack; + final Map supMapRack3 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 10, 8000 * 2 + 4000, Collections.emptyMap(), 1.0); + + //generate some that has a lot of cpu but little of memory + supStartNum += numSupersPerRack; + final Map supMapRack4 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 400 + 200 + 10, 1000, Collections.emptyMap(), 1.0); + + supMap.putAll(supMapRack0); + supMap.putAll(supMapRack1); + supMap.putAll(supMapRack2); + supMap.putAll(supMapRack3); + supMap.putAll(supMapRack4); + + Config config = createClusterConfig(100, 500, 500, null); + config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + INimbus iNimbus = new INimbusTest(); + + //create test DNSToSwitchMapping plugin + TestDNSToSwitchMapping testDNSToSwitchMapping = + new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4); + + Config t1Conf = new Config(); + t1Conf.putAll(config); + final List t1FavoredHostNames = Arrays.asList("host-41", "host-42", "host-43"); + t1Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, t1FavoredHostNames); + final List t1UnfavoredHostIds = Arrays.asList("host-1", "host-2", "host-3"); + t1Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, t1UnfavoredHostIds); + //generate topologies + TopologyDetails topo1 = genTopology("topo-1", t1Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + + Config t2Conf = new Config(); + t2Conf.putAll(config); + t2Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, Arrays.asList("host-31", "host-32", "host-33")); + t2Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, Arrays.asList("host-11", "host-12", "host-13")); + TopologyDetails topo2 = genTopology("topo-2", t2Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + Topologies topologies = new Topologies(topo1, topo2); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + List supHostnames = new LinkedList<>(); + for (SupervisorDetails sup : supMap.values()) { + supHostnames.add(sup.getHost()); + } + Map> rackToHosts = testDNSToSwitchMapping.getRackToHosts(); + cluster.setNetworkTopography(rackToHosts); + + NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, topo1, BaseResourceAwareStrategy.NodeSortType.COMMON); + nodeSorter.prepare(null); + List sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false) + .collect(Collectors.toList()); + String rackSummaries = sortedRacks.stream() + .map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", + x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), + x.minResourcePercent, x.avgResourcePercent, + x.availableResources.getTotalCpu(), + x.availableResources.getTotalMemoryMb())) + .collect(Collectors.joining("\n\t")); + + Iterator it = sortedRacks.iterator(); + // Ranked first since rack-000 has the most balanced set of resources + assertEquals("rack-004", it.next().id, "rack-004 should be ordered first\n\t" + rackSummaries); + // Ranked second since rack-1 has a balanced set of resources but less than rack-0 + assertEquals("rack-000", it.next().id, "rack-000 should be ordered second\n\t" + rackSummaries); + // Ranked third since rack-4 has a lot of cpu but not a lot of memory + assertEquals("rack-003", it.next().id, "rack-003 should be ordered\n\t" + rackSummaries); + // Ranked fourth since rack-3 has alot of memory but not cpu + assertEquals("rack-001", it.next().id, "rack-001 should be ordered fourth\n\t" + rackSummaries); + //Ranked last since rack-2 has not cpu resources + assertEquals("rack-002", it.next().id, "rack-002 should be ordered fifth\n\t" + rackSummaries); + } + + /** + * Test if hosts are presented together regardless of resource availability. + * Supervisors are created with multiple Numa zones in such a manner that resources on two numa zones on the same host + * differ widely in resource availability. + */ + @Test + public void testMultipleRacksWithHostProximity() { + final Map supMap = new HashMap<>(); + final int numRacks = 1; + final int numSupersPerRack = 12; + final int numPortsPerSuper = 4; + final int numZonesPerHost = 3; + final double numaResourceMultiplier = 0.4; + int rackStartNum = 0; + int supStartNum = 0; + + final Map supMapRack0 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 400, 8000, Collections.emptyMap(), numaResourceMultiplier); + + //generate another rack of supervisors with less resources + supStartNum += numSupersPerRack; + final Map supMapRack1 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 200, 4000, Collections.emptyMap(), numaResourceMultiplier); + + //generate some supervisors that are depleted of one resource + supStartNum += numSupersPerRack; + final Map supMapRack2 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 0, 8000, Collections.emptyMap(), numaResourceMultiplier); + + //generate some that has a lot of memory but little of cpu + supStartNum += numSupersPerRack; + final Map supMapRack3 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 10, 8000 * 2 + 4000, Collections.emptyMap(),numaResourceMultiplier); + + //generate some that has a lot of cpu but little of memory + supStartNum += numSupersPerRack; + final Map supMapRack4 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 400 + 200 + 10, 1000, Collections.emptyMap(), numaResourceMultiplier); + + supMap.putAll(supMapRack0); + supMap.putAll(supMapRack1); + supMap.putAll(supMapRack2); + supMap.putAll(supMapRack3); + supMap.putAll(supMapRack4); + + Config config = createClusterConfig(100, 500, 500, null); + config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + INimbus iNimbus = new INimbusTest(); + + //create test DNSToSwitchMapping plugin + TestDNSToSwitchMapping testDNSToSwitchMapping = + new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4); + + Config t1Conf = new Config(); + t1Conf.putAll(config); + final List t1FavoredHostNames = Arrays.asList("host-41", "host-42", "host-43"); + t1Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, t1FavoredHostNames); + final List t1UnfavoredHostIds = Arrays.asList("host-1", "host-2", "host-3"); + t1Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, t1UnfavoredHostIds); + //generate topologies + TopologyDetails topo1 = genTopology("topo-1", t1Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + + Config t2Conf = new Config(); + t2Conf.putAll(config); + t2Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, Arrays.asList("host-31", "host-32", "host-33")); + t2Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, Arrays.asList("host-11", "host-12", "host-13")); + TopologyDetails topo2 = genTopology("topo-2", t2Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + Topologies topologies = new Topologies(topo1, topo2); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + INodeSorter nodeSorter = new NodeSorterHostProximity(cluster, topo1); + nodeSorter.prepare(null); + + Set seenHosts = new HashSet<>(); + String prevHost = null; + List errLines = new ArrayList(); + Map nodeToHost = new RasNodes(cluster).getNodeIdToHostname(); + for (String nodeId: nodeSorter.sortAllNodes()) { + String host = nodeToHost.getOrDefault(nodeId, "no-host-for-node-" + nodeId); + errLines.add(String.format("\tnodeId:%s, host:%s", nodeId, host)); + if (!host.equals(prevHost) && seenHosts.contains(host)) { + String err = String.format("Host %s for node %s is out of order:\n\t%s", host, nodeId, String.join("\n\t", errLines)); + fail(err); + } + seenHosts.add(host); + prevHost = host; + } + } + + /** + * Racks should be returned in order of decreasing capacity. + */ + @Test + public void testMultipleRacksOrderedByCapacity() { + final Map supMap = new HashMap<>(); + final int numRacks = 1; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 4; + final int numZonesPerHost = 1; + final double numaResourceMultiplier = 1.0; + int rackStartNum = 0; + int supStartNum = 0; + + final Map supMapRack0 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 600, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier); + + supStartNum += numSupersPerRack; + final Map supMapRack1 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 500, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier); + + supStartNum += numSupersPerRack; + final Map supMapRack2 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 400, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier); + + supStartNum += numSupersPerRack; + final Map supMapRack3 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 300, 8000 - rackStartNum, Collections.emptyMap(),numaResourceMultiplier); + + supStartNum += numSupersPerRack; + final Map supMapRack4 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 200, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier); + + // too small to hold topology + supStartNum += numSupersPerRack; + final Map supMapRack5 = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, + 100, 8000 - rackStartNum, Collections.singletonMap("gpu.count", 0.0), numaResourceMultiplier); + + supMap.putAll(supMapRack0); + supMap.putAll(supMapRack1); + supMap.putAll(supMapRack2); + supMap.putAll(supMapRack3); + supMap.putAll(supMapRack4); + supMap.putAll(supMapRack5); + + Config config = createClusterConfig(100, 500, 500, null); + config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE); + INimbus iNimbus = new INimbusTest(); + + //create test DNSToSwitchMapping plugin + TestDNSToSwitchMapping testDNSToSwitchMapping = + new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4, supMapRack5); + + //generate topologies + TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + TopologyDetails topo2 = genTopology("topo-2", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user"); + + Topologies topologies = new Topologies(topo1, topo2); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, topo1); + nodeSorter.prepare(null); + List sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false) + .collect(Collectors.toList()); + String rackSummaries = sortedRacks + .stream() + .map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", + x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), + x.minResourcePercent, x.avgResourcePercent, + x.availableResources.getTotalCpu(), + x.availableResources.getTotalMemoryMb())) + .collect(Collectors.joining("\n\t")); + NormalizedResourceRequest topoResourceRequest = topo1.getApproximateTotalResources(); + String topoRequest = String.format("Topo %s, approx-requested-resources %s", topo1.getId(), topoResourceRequest.toString()); + Iterator it = sortedRacks.iterator(); + assertEquals("rack-000", it.next().id, topoRequest + "\n\t" + rackSummaries + "\nRack-000 should be ordered first since it has the largest capacity"); + assertEquals("rack-001", it.next().id, topoRequest + "\n\t" + rackSummaries + "\nrack-001 should be ordered second since it smaller than rack-000"); + assertEquals("rack-002", it.next().id, topoRequest + "\n\t" + rackSummaries + "\nrack-002 should be ordered third since it is smaller than rack-001"); + assertEquals("rack-003", it.next().id, topoRequest + "\n\t" + rackSummaries + "\nrack-003 should be ordered fourth since it since it is smaller than rack-002"); + assertEquals("rack-004", it.next().id, topoRequest + "\n\t" + rackSummaries + "\nrack-004 should be ordered fifth since it since it is smaller than rack-003"); + assertEquals("rack-005", it.next().id, topoRequest + "\n\t" + rackSummaries + "\nrack-005 should be ordered last since it since it is has smallest capacity"); + } + + /** + * Schedule two topologies, once with special resources and another without. + * There are enough special resources to hold one topology with special resource ("my.gpu"). + * When using Round Robin scheduling, only one topology will be scheduled. + */ + @Test + public void testAntiAffinityWithMultipleTopologies() { + INimbus iNimbus = new INimbusTest(); + Map supMap = genSupervisorsWithRacks(1, 40, 66, 0, 0, 4700, 226200, new HashMap<>()); + HashMap extraResources = new HashMap<>(); + extraResources.put("my.gpu", 1.0); + supMap.putAll(genSupervisorsWithRacks(1, 40, 66, 1, 0, 4700, 226200, extraResources)); + + Config config = new Config(); + config.putAll(createRoundRobinClusterConfig(88, 775, 25, null, null)); + + IScheduler scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + TopologyDetails tdSimple = genTopology("topology-simple", config, 1, + 5, 100, 300, 0, 0, "user", 8192); + + //Schedule the simple topology first + Topologies topologies = new Topologies(tdSimple); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + + { + NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdSimple); + for (ExecutorDetails exec : tdSimple.getExecutors()) { + nodeSorter.prepare(exec); + List sortedRacks = StreamSupport + .stream(nodeSorter.getSortedRacks().spliterator(), false) + .collect(Collectors.toList()); + String rackSummaries = StreamSupport + .stream(sortedRacks.spliterator(), false) + .map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", + x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), + x.minResourcePercent, x.avgResourcePercent, + x.availableResources.getTotalCpu(), + x.availableResources.getTotalMemoryMb())) + .collect(Collectors.joining("\n\t")); + NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources(); + String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString()); + assertEquals(2, sortedRacks.size(), rackSummaries + "\n# of racks sorted"); + assertEquals("rack-000", sortedRacks.get(0).id, rackSummaries + "\nFirst rack sorted"); + assertEquals("rack-001", sortedRacks.get(1).id, rackSummaries + "\nSecond rack sorted"); + } + } + + scheduler.schedule(topologies, cluster); + Map assignments = new TreeMap<>(cluster.getAssignments()); + assertEquals(1, assignments.size()); + + TopologyBuilder builder = topologyBuilder(1, 5, 100, 300); + builder.setBolt("gpu-bolt", new TestBolt(), 40) + .addResource("my.gpu", 1.0) + .shuffleGrouping("spout-0"); + TopologyDetails tdGpu = topoToTopologyDetails("topology-gpu", config, builder.createTopology(), 0, 0,"user", 8192); + + //Now schedule GPU but with the simple topology in place. + topologies = new Topologies(tdSimple, tdGpu); + cluster = new Cluster(cluster, topologies); + { + NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdGpu); + for (ExecutorDetails exec : tdGpu.getExecutors()) { + String comp = tdGpu.getComponentFromExecutor(exec); + nodeSorter.prepare(exec); + List sortedRacks = StreamSupport + .stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList()); + String rackSummaries = sortedRacks.stream() + .map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", + x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), + x.minResourcePercent, x.avgResourcePercent, + x.availableResources.getTotalCpu(), + x.availableResources.getTotalMemoryMb())) + .collect(Collectors.joining("\n\t")); + NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources(); + String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString()); + assertEquals(2, sortedRacks.size(), rackSummaries + "\n# of racks sorted"); + if (comp.equals("gpu-bolt")) { + assertEquals("rack-001", sortedRacks.get(0).id, rackSummaries + "\nFirst rack sorted for " + comp); + assertEquals("rack-000", sortedRacks.get(1).id, rackSummaries + "\nSecond rack sorted for " + comp); + } else { + assertEquals("rack-000", sortedRacks.get(0).id, rackSummaries + "\nFirst rack sorted for " + comp); + assertEquals("rack-001", sortedRacks.get(1).id, rackSummaries + "\nSecond rack sorted for " + comp); + } + } + } + + scheduler.schedule(topologies, cluster); + + assignments = new TreeMap<>(cluster.getAssignments()); + assertEquals(1, assignments.size()); // second topology is not expected to be assigned + + Map> topoPerRackCount = new HashMap<>(); + for (Map.Entry entry: assignments.entrySet()) { + SchedulerAssignment sa = entry.getValue(); + Map slotsPerRack = new TreeMap<>(); + for (WorkerSlot slot : sa.getSlots()) { + String nodeId = slot.getNodeId(); + String rack = supervisorIdToRackName(nodeId); + slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet(); + } + LOG.info("{} => {}", entry.getKey(), slotsPerRack); + topoPerRackCount.put(entry.getKey(), slotsPerRack); + } + + Map simpleCount = topoPerRackCount.get("topology-simple-0"); + assertNotNull(simpleCount); + //Because the simple topology was scheduled first we want to be sure that it didn't put anything on + // the GPU nodes. + assertEquals(2, simpleCount.size()); //Both racks are in use + assertTrue(simpleCount.containsKey("r001")); //r001 is the second rack with GPUs + assertTrue(simpleCount.containsKey("r000")); //r000 is the first rack with no GPUs + + //We don't really care too much about the scheduling of topology-gpu-0, because it was scheduled. + } + + /** + * Free one-fifth of WorkerSlots. + */ + private void freeSomeWorkerSlots(Cluster cluster) { + Map assignmentMap = cluster.getAssignments(); + for (SchedulerAssignment schedulerAssignment: assignmentMap.values()) { + int i = 0; + List slotsToKill = new ArrayList<>(); + for (WorkerSlot workerSlot: schedulerAssignment.getSlots()) { + i++; + if (i % 5 == 0) { + slotsToKill.add(workerSlot); + } + } + cluster.freeSlots(slotsToKill); + } + } + + /** + * If the topology should be scheduled across all available racks instead of + * filling first rack and spilling on to the next rack. + */ + @Test + public void testDistributeOverRacks() { + INimbus iNimbus = new INimbusTest(); + double compPcore = 100; + double compOnHeap = 775; + double compOffHeap = 25; + int topo1NumSpouts = 1; + int topo1NumBolts = 5; + int topo1SpoutParallelism = 100; + int topo1BoltParallelism = 200; + final int numRacks = 3; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 6; + final int numZonesPerHost = 1; + final double numaResourceMultiplier = 1.0; + int rackStartNum = 0; + int supStartNum = 0; + long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism) * 4/5; // not enough for topo1 + long compPerSuper = compPerRack / numSupersPerRack; + double cpuPerSuper = compPcore * compPerSuper; + double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper; + double topo1MaxHeapSize = memPerSuper; + final String topoName1 = "topology1"; + + Map supMap = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, + cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier); + TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values()); + + Config config = new Config(); + config.putAll(createRoundRobinClusterConfig(compPcore, compOnHeap, compOffHeap, null, null)); + + IScheduler scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, + topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize); + + //Schedule the topo1 topology and ensure it fits on 2 racks + Topologies topologies = new Topologies(td1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + scheduler.schedule(topologies, cluster); + Set assignedRacks = cluster.getAssignedRacks(td1.getId()); + assertEquals(numRacks, assignedRacks.size(), "Racks for topology=" + td1.getId() + " is " + assignedRacks); + } + + /** + * Racks are equally likely to be selected, rather than those with low resources already running components + * for the same topology. + * . + *

  • Schedule topo1 on one rack
  • + *
  • unassign some executors
  • + *
  • schedule another topology - cannot be scheduled since topo1 occupies all slots
  • + *
  • unassign topo2, kill workers and reschedule
  • + *
  • topo1 should utilize all all racks
  • + */ + @Test + public void testDistributeAcrossRacks() { + INimbus iNimbus = new INimbusTest(); + double compPcore = 100; + double compOnHeap = 775; + double compOffHeap = 25; + int topo1NumSpouts = 1; + int topo1NumBolts = 5; + int topo1SpoutParallelism = 100; + int topo1BoltParallelism = 200; + int topo2NumSpouts = 1; + int topo2NumBolts = 5; + int topo2SpoutParallelism = 10; + int topo2BoltParallelism = 20; + final int numRacks = 3; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 6; + final int numZonesPerHost = 1; + final double numaResourceMultiplier = 1.0; + int rackStartNum = 0; + int supStartNum = 0; + long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + + topo2NumSpouts * topo2SpoutParallelism); // enough for topo1 but not topo1+topo2 + long compPerSuper = compPerRack / numSupersPerRack; + double cpuPerSuper = compPcore * compPerSuper; + double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper; + double topo1MaxHeapSize = memPerSuper; + double topo2MaxHeapSize = memPerSuper; + final String topoName1 = "topology1"; + final String topoName2 = "topology2"; + + Map supMap = genSupervisorsWithRacksAndNuma( + numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, + cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier); + TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values()); + + Config config = new Config(); + config.putAll(createRoundRobinClusterConfig(compPcore, compOnHeap, compOffHeap, null, null)); + + IScheduler scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, + topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize); + + //Schedule the topo1 topology and ensure it fits on 1 rack + Topologies topologies = new Topologies(td1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + scheduler.schedule(topologies, cluster); + Set assignedRacks = cluster.getAssignedRacks(td1.getId()); + assertEquals(numRacks, assignedRacks.size(), "Racks for topology=" + td1.getId() + " is " + assignedRacks); + + TopologyBuilder builder = topologyBuilder(topo2NumSpouts, topo2NumBolts, topo2SpoutParallelism, topo2BoltParallelism); + TopologyDetails td2 = topoToTopologyDetails(topoName2, config, builder.createTopology(), 0, 0,"user", topo2MaxHeapSize); + + //Now schedule GPU but with the simple topology in place. + topologies = new Topologies(td1, td2); + cluster = new Cluster(cluster, topologies); + scheduler.schedule(topologies, cluster); + + assignedRacks = cluster.getAssignedRacks(td1.getId(), td2.getId()); + assertEquals(numRacks, assignedRacks.size(), "Racks for topologies=" + td1.getId() + "/" + td2.getId() + " is " + assignedRacks); + + // topo2 will not get scheduled as topo1 will occupy all racks + assignedRacks = cluster.getAssignedRacks(td2.getId()); + assertEquals(0, assignedRacks.size(), "Racks for topologies=" + td2.getId() + " is " + assignedRacks); + + // now unassign topo2, expect all racks to be in use; free some slots and reschedule topo1 some topo1 executors + cluster.unassign(td2.getId()); + assignedRacks = cluster.getAssignedRacks(td2.getId()); + assertEquals(0, assignedRacks.size(), + "After unassigning topology " + td2.getId() + ", racks for topology=" + td2.getId() + " is " + assignedRacks); + assignedRacks = cluster.getAssignedRacks(td1.getId()); + assertEquals(numRacks, assignedRacks.size(), + "After unassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks); + assertFalse(cluster.needsSchedulingRas(td1), + "Topology " + td1.getId() + " should be fully assigned before freeing slots"); + freeSomeWorkerSlots(cluster); + assertTrue(cluster.needsSchedulingRas(td1), + "Topology " + td1.getId() + " should need scheduling after freeing slots"); + + // then reschedule executors + scheduler.schedule(topologies, cluster); + + // all racks should be in use by topology1 + assignedRacks = cluster.getAssignedRacks(td1.getId()); + assertEquals(numRacks, assignedRacks.size(), + "After reassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks); + } + + /** + * Assign and then clear out a rack to host list mapping in cluster.networkTopography. + * Expected behavior is that: + *
  • the rack without hosts does not show up in {@link NodeSorterHostProximity#getSortedRacks()}
  • + *
  • all the supervisor nodes still get returned in {@link NodeSorterHostProximity#sortAllNodes()} ()}
  • + *
  • supervisors on cleared rack show up under {@link DNSToSwitchMapping#DEFAULT_RACK}
  • + * + *

    + * Force an usual condition, where one of the racks is still passed to LazyNodeSortingIterator with + * an empty list and then ensure that code is resilient. + *

    + */ + @Test + void testWithImpairedClusterNetworkTopography() { + INimbus iNimbus = new INimbusTest(); + double compPcore = 100; + double compOnHeap = 775; + double compOffHeap = 25; + int topo1NumSpouts = 1; + int topo1NumBolts = 5; + int topo1SpoutParallelism = 100; + int topo1BoltParallelism = 200; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 66; + long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + 10); + long compPerSuper = compPerRack / numSupersPerRack; + double cpuPerSuper = compPcore * compPerSuper; + double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper; + double topo1MaxHeapSize = memPerSuper; + final String topoName1 = "topology1"; + int numRacks = 3; + + Map supMap = genSupervisorsWithRacks(numRacks, numSupersPerRack, numPortsPerSuper, + 0, 0, cpuPerSuper, memPerSuper, new HashMap<>()); + TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values()); + + Config config = new Config(); + config.putAll(createRoundRobinClusterConfig(compPcore, compOnHeap, compOffHeap, null, null)); + + IScheduler scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, + topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize); + + Topologies topologies = new Topologies(td1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + Map> networkTopography = cluster.getNetworkTopography(); + assertEquals(numRacks, networkTopography.size(), "Expecting " + numRacks + " racks found " + networkTopography.size()); + assertTrue(networkTopography.size() >= 3, "Expecting racks count to be >= 3, found " + networkTopography.size()); + + // Impair cluster.networkTopography and set one rack to have zero hosts, getSortedRacks should exclude this rack. + // Keep, the supervisorDetails unchanged - confirm that these nodes are not lost even with incomplete networkTopography + String rackIdToZero = networkTopography.keySet().stream().findFirst().get(); + impairClusterRack(cluster, rackIdToZero, true, false); + + NodeSorterHostProximity nodeSorterHostProximity = new NodeSorterHostProximity(cluster, td1); + nodeSorterHostProximity.getSortedRacks().forEach(x -> assertNotEquals(x.id, rackIdToZero)); + + // confirm that the above action has not lost the hosts and that they appear under the DEFAULT rack + { + Set seenRacks = new HashSet<>(); + nodeSorterHostProximity.getSortedRacks().forEach(x -> seenRacks.add(x.id)); + assertEquals(numRacks, seenRacks.size(), "Expecting rack cnt to be still " + numRacks); + assertTrue(seenRacks.contains(DNSToSwitchMapping.DEFAULT_RACK), + "Expecting to see default-rack=" + DNSToSwitchMapping.DEFAULT_RACK + " in sortedRacks"); + } + + // now check if node/supervisor is missing when sorting all nodes + Set expectedNodes = supMap.keySet(); + Set seenNodes = new HashSet<>(); + nodeSorterHostProximity.prepare(null); + nodeSorterHostProximity.sortAllNodes().forEach( n -> seenNodes.add(n)); + assertEquals(expectedNodes, seenNodes, "Expecting see all supervisors "); + + // Now fully impair the cluster - confirm no default rack + { + cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + cluster.setNetworkTopography(new TestDNSToSwitchMapping(supMap.values()).getRackToHosts()); + impairClusterRack(cluster, rackIdToZero, true, true); + Set seenRacks = new HashSet<>(); + NodeSorterHostProximity nodeSorterHostProximity2 = new NodeSorterHostProximity(cluster, td1); + nodeSorterHostProximity2.getSortedRacks().forEach(x -> seenRacks.add(x.id)); + Map> rackIdToHosts = nodeSorterHostProximity2.getRackIdToHosts(); + String dumpOfRacks = rackIdToHosts.entrySet().stream() + .map(x -> String.format("rack %s -> hosts [%s]", x.getKey(), String.join(",", x.getValue()))) + .collect(Collectors.joining("\n\t")); + assertEquals(numRacks - 1, seenRacks.size(), + "Expecting rack cnt to be " + (numRacks - 1) + " but found " + seenRacks.size() + "\n\t" + dumpOfRacks); + assertFalse(seenRacks.contains(DNSToSwitchMapping.DEFAULT_RACK), + "Found default-rack=" + DNSToSwitchMapping.DEFAULT_RACK + " in \n\t" + dumpOfRacks); + } + } + + /** + * Black list all nodes for a rack before sorting nodes. + * Confirm that {@link NodeSorterHostProximity#sortAllNodes()} still works. + * + */ + @Test + void testWithBlackListedHosts() { + INimbus iNimbus = new INimbusTest(); + double compPcore = 100; + double compOnHeap = 775; + double compOffHeap = 25; + int topo1NumSpouts = 1; + int topo1NumBolts = 5; + int topo1SpoutParallelism = 100; + int topo1BoltParallelism = 200; + final int numSupersPerRack = 10; + final int numPortsPerSuper = 66; + long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + 10); + long compPerSuper = compPerRack / numSupersPerRack; + double cpuPerSuper = compPcore * compPerSuper; + double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper; + double topo1MaxHeapSize = memPerSuper; + final String topoName1 = "topology1"; + int numRacks = 3; + + Map supMap = genSupervisorsWithRacks(numRacks, numSupersPerRack, numPortsPerSuper, + 0, 0, cpuPerSuper, memPerSuper, new HashMap<>()); + TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values()); + + Config config = new Config(); + config.putAll(createRoundRobinClusterConfig(compPcore, compOnHeap, compOffHeap, null, null)); + + IScheduler scheduler = new ResourceAwareScheduler(); + scheduler.prepare(config, new StormMetricsRegistry()); + + TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, + topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize); + + Topologies topologies = new Topologies(td1); + Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); + cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts()); + + Map> networkTopography = cluster.getNetworkTopography(); + assertEquals(numRacks, networkTopography.size(), "Expecting " + numRacks + " racks found " + networkTopography.size()); + assertTrue(networkTopography.size() >= 3, "Expecting racks count to be >= 3, found " + networkTopography.size()); + + Set blackListedHosts = new HashSet<>(); + List supArray = new ArrayList<>(supMap.values()); + for (int i = 0 ; i < numSupersPerRack ; i++) { + blackListedHosts.add(supArray.get(i).getHost()); + } + blacklistHostsAndSortNodes(blackListedHosts, supMap.values(), cluster, td1); + + String rackToClear = cluster.getNetworkTopography().keySet().stream().findFirst().get(); + blackListedHosts = new HashSet<>(cluster.getNetworkTopography().get(rackToClear)); + blacklistHostsAndSortNodes(blackListedHosts, supMap.values(), cluster, td1); + } + + // Impair cluster by blacklisting some hosts + private void blacklistHostsAndSortNodes( + Set blackListedHosts, Collection sups, Cluster cluster, TopologyDetails td1) { + LOG.info("blackListedHosts={}", blackListedHosts); + cluster.setBlacklistedHosts(blackListedHosts); + + NodeSorterHostProximity nodeSorterHostProximity = new NodeSorterHostProximity(cluster, td1); + // confirm that the above action loses hosts + { + Set allHosts = sups.stream().map(x -> x.getHost()).collect(Collectors.toSet()); + Set seenRacks = new HashSet<>(); + nodeSorterHostProximity.getSortedRacks().forEach(x -> seenRacks.add(x.id)); + Set seenHosts = new HashSet<>(); + nodeSorterHostProximity.getRackIdToHosts().forEach((k,v) -> seenHosts.addAll(v)); + allHosts.removeAll(seenHosts); + assertEquals(allHosts, blackListedHosts, "Expecting only blacklisted hosts removed"); + } + + // now check if sortAllNodes still works + Set expectedNodes = sups.stream() + .filter(x -> !blackListedHosts.contains(x.getHost())) + .map(x ->x.getId()) + .collect(Collectors.toSet()); + Set seenNodes = new HashSet<>(); + nodeSorterHostProximity.prepare(null); + nodeSorterHostProximity.sortAllNodes().forEach( n -> seenNodes.add(n)); + assertEquals(expectedNodes, seenNodes, "Expecting see all supervisors "); + } + + /** + * Impair the cluster for a specified rackId. + *
  • making the host list a zero length
  • + *
  • removing supervisors for the hosts on the rack
  • + * + * @param cluster cluster to impair + * @param rackId rackId to clear + * @param clearNetworkTopography if true, then clear (but not remove) the hosts in list for the rack. + * @param clearSupervisorMap if true, then remove supervisors for the rack. + */ + private void impairClusterRack(Cluster cluster, String rackId, boolean clearNetworkTopography, boolean clearSupervisorMap) { + Set hostIds = new HashSet<>(cluster.getNetworkTopography().computeIfAbsent(rackId, k -> new ArrayList<>())); + if (clearNetworkTopography) { + cluster.getNetworkTopography().computeIfAbsent(rackId, k -> new ArrayList<>()).clear(); + } + if (clearSupervisorMap) { + Set supToRemove = new HashSet<>(); + for (String hostId: hostIds) { + cluster.getSupervisorsByHost(hostId).forEach(s -> supToRemove.add(s.getId())); + } + Map supervisorDetailsMap = cluster.getSupervisors(); + for (String supId: supToRemove) { + supervisorDetailsMap.remove(supId); + } + } + } +} \ No newline at end of file diff --git a/storm-shaded-deps/pom.xml b/storm-shaded-deps/pom.xml index c5130e4ee01..23847c4af65 100644 --- a/storm-shaded-deps/pom.xml +++ b/storm-shaded-deps/pom.xml @@ -23,7 +23,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/storm-submit-tools/pom.xml b/storm-submit-tools/pom.xml index b63948444c1..2344a2d86b9 100644 --- a/storm-submit-tools/pom.xml +++ b/storm-submit-tools/pom.xml @@ -17,7 +17,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml 4.0.0 diff --git a/storm-webapp/pom.xml b/storm-webapp/pom.xml index 0cfdc7aa210..9e5ed421d79 100644 --- a/storm-webapp/pom.xml +++ b/storm-webapp/pom.xml @@ -21,7 +21,7 @@ storm org.apache.storm - 2.5.0-SNAPSHOT + 2.6.0-SNAPSHOT ../pom.xml diff --git a/storm-webapp/src/main/java/org/apache/storm/daemon/logviewer/utils/DirectoryCleaner.java b/storm-webapp/src/main/java/org/apache/storm/daemon/logviewer/utils/DirectoryCleaner.java index f4bfcd7c2f5..8d1da3509b6 100644 --- a/storm-webapp/src/main/java/org/apache/storm/daemon/logviewer/utils/DirectoryCleaner.java +++ b/storm-webapp/src/main/java/org/apache/storm/daemon/logviewer/utils/DirectoryCleaner.java @@ -173,6 +173,7 @@ public DeletionMeta deleteOldestWhileTooLarge(List dirs, LOG.warn("No more files eligible to be deleted this round, but {} is over {} quota by {} MB", forPerDir ? "worker directory: " + dirs.get(0).toAbsolutePath().normalize() : "log root directory", forPerDir ? "per-worker" : "global", toDeleteSize * 1e-6); + break; // No entries left to delete } } return new DeletionMeta(deletedSize, deletedFiles);