diff --git a/README.md b/README.md index 5632e7b7..4e8b1021 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ This is a demo of GeoTrellis functionality. Demo consists of two parts: tile ingest process and demo server to query ingested data. -To run ingest, use `./ingest.sh`, to run server, use `./sbt run`. Web map would be available here `http://locahost:8777/`. +To run ingest, use `./ingest.sh`, to run server, use `./run-server.sh`. Web map would be available here `http://locahost:8777/`. ## Short description @@ -54,18 +54,13 @@ If the `mask` option is set to a polygon, `{zoom}/{x}/{y}` tiles masked by polyg This service takes layers, weights and a polygon. It will compute a weighted summary of the area under the polygon. -## Runing demo using [GeoDocker cluster](https://github.com/geotrellis/geodocker-cluster) +## Runing demo using [GeoDocker cluster](https://github.com/geodocker/geodocker) -To compile and run this demo, we prepared a development environment. - -* Clone GeoDocker cluster repository: - ```bash - git clone https://github.com/geotrellis/geodocker-cluster ./ - ``` +To compile and run this demo, we prepared an [environment](https://github.com/geodocker/geodocker). To run cluster we have a bit modified [docker-compose.yml](docker-compose.yml) file: * To run cluster: ```bash - cd ./geodocker-cluster/nodes; ./start-cluster.sh -n=2 # n >= 1, nodes amount + docker-compose up ``` To check that cluster is operating normally check pages availability: @@ -78,14 +73,59 @@ To compile and run this demo, we prepared a development environment. ```bash docker ps -a | grep geodocker ``` - Runing containers have names `master1`, `slave1`, ..., `slaveN`, `N = n - 1`. - -* Install and run this demo on cluster - ```bash - cd ./geodocker-cluster/install/geotrellis - ./install.sh - ./ingest.sh # to ingest - ./run.sh # to run server on a cluster - ``` - This demo would be installed into `/data` directory, inside the container. + More information avaible in a [GeoDocker cluster](https://github.com/geodocker/geodocker) repo + +* Install and run this demo using [GeoDocker cluster](https://github.com/geodocker/geodocker) + + * Modify [application.conf](geotrellis/src/main/resource/application.conf) (working conf example for GeoDocker cluster): + + ```conf + geotrellis { + port = 8777 + server.static-path = "../static" + catalog = "data/catalog.json" + hostname = "spark-master" + backend = "accumulo" + } + + accumulo { + instance = "accumulo" + user = "root" + password = "GisPwd" + zookeepers = "zookeeper" + } + ``` + + * Modify [backend-profiles.json](geotrellis/conf/backend-profiles.json) (working conf example for GeoDocker cluster): + + ```json + { + "name": "accumulo-local", + "type": "accumulo", + "zookeepers": "zookeeper", + "instance": "accumulo", + "user": "root", + "password": "GisPwd" + } + ``` + + * Copy everything into spark master container: + + ```bash + cd ./geodocker + docker exec geotrellischattademo_spark-master_1 mkdir -p /data/target/scala-2.10/ + docker cp target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar geotrellischattademo_spark-master_1:/data/target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar + docker cp data/arg_wm/ geotrellischattademo_spark-master_1:/data/ + docker cp conf geotrellischattademo_spark-master_1:/data/ + docker cp ingest.sh geotrellischattademo_spark-master_1:/data/ + docker cp run-server.sh geotrellischattademo_spark-master_1:/data/ + ``` + + ```bash + docker exec -it geotrellischattademo_spark-master_1 bash + cd /data/; ./ingest.sh # to ingest data into accumulo + cd /data/; ./run-server.sh # to run server + ``` + + This demo would be installed into `/data` directory, inside spark master container. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..0c55afc8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,74 @@ +version: '2' +services: + hdfs-name: + image: quay.io/geodocker/hdfs:latest + command: name + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + ports: + - 50070:50070 + # volumes: + # - '/data/gt/hdfs:/data/hdfs' + hdfs-data: + image: quay.io/geodocker/hdfs:latest + command: data + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + depends_on: + - hdfs-name + # volumes: + # - '/data/gt/hdfs:/data/hdfs' + zookeeper: + image: quay.io/geodocker/zookeeper:latest + ports: + - 2181:2181 + # volumes: + # - '/data/gt/hdfs:/data/hdfs' + accumulo-master: + image: quay.io/geodocker/accumulo:latest + command: master --auto-init + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + ZOOKEEPERS: zookeeper + ACCUMULO_PASSWORD: GisPwd + depends_on: + - zookeeper + accumulo-monitor: + image: quay.io/geodocker/accumulo:latest + command: monitor + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + ZOOKEEPERS: zookeeper + ports: + - 50095:50095 + depends_on: + - zookeeper + - accumulo-master + accumulo-tserver: + image: quay.io/geodocker/accumulo:latest + command: tserver + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + ZOOKEEPERS: zookeeper + depends_on: + - zookeeper + - accumulo-master + spark-master: + image: quay.io/geodocker/spark:latest + command: master + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + ports: + - 4040:4040 + - 8080:8080 + - 8777:8777 + depends_on: + - hdfs-data + spark-worker: + image: quay.io/geodocker/spark:latest + command: worker + environment: + HADOOP_MASTER_ADDRESS: hdfs-name + SPARK_MASTER: spark-master + depends_on: + - spark-master \ No newline at end of file diff --git a/geotrellis/build.sbt b/geotrellis/build.sbt index db0f460a..91571e61 100644 --- a/geotrellis/build.sbt +++ b/geotrellis/build.sbt @@ -21,21 +21,23 @@ pomIncludeRepository := { _ => false } resolvers += Resolver.bintrayRepo("azavea", "geotrellis") -val gtVersion = "0.10.0-RC4" +val gtVersion = "1.0.0-SNAPSHOT" val geotrellis = Seq( "com.azavea.geotrellis" %% "geotrellis-accumulo" % gtVersion, + "com.azavea.geotrellis" %% "geotrellis-hbase" % gtVersion, + "com.azavea.geotrellis" %% "geotrellis-cassandra" % gtVersion, "com.azavea.geotrellis" %% "geotrellis-s3" % gtVersion, "com.azavea.geotrellis" %% "geotrellis-spark" % gtVersion, "com.azavea.geotrellis" %% "geotrellis-spark-etl" % gtVersion ) -libraryDependencies ++= Seq( - "org.apache.spark" %% "spark-core" % "1.5.2", - "io.spray" %% "spray-routing" % "1.3.3", - "io.spray" %% "spray-can" % "1.3.3", - "org.apache.hadoop" % "hadoop-client" % "2.7.1" -) ++ geotrellis +libraryDependencies ++= (((Seq( + "org.apache.spark" %% "spark-core" % "1.5.2", + "io.spray" %% "spray-routing" % "1.3.3", + "io.spray" %% "spray-can" % "1.3.3", + "org.apache.hadoop" % "hadoop-client" % "2.7.1" +) ++ geotrellis) map (_ exclude("com.google.guava", "guava"))) ++ Seq("com.google.guava" % "guava" % "16.0.1")) ivyScala := ivyScala.value map { _.copy(overrideScalaVersion = true) } diff --git a/geotrellis/conf/backend-profiles.json b/geotrellis/conf/backend-profiles.json new file mode 100644 index 00000000..7b91b1db --- /dev/null +++ b/geotrellis/conf/backend-profiles.json @@ -0,0 +1,24 @@ +{ + "backend-profiles": [ + { + "name": "accumulo-local", + "type": "accumulo", + "zookeepers": "localhost", + "instance": "gis", + "user": "root", + "password": "secret" + }, + { + "name": "cassandra-local", + "type": "cassandra", + "allowRemoteDCsForLocalConsistencyLevel": false, + "localDc": "datacenter1", + "usedHostsPerRemoteDc": 0, + "hosts": "localhost", + "replicationStrategy": "SimpleStrategy", + "replicationFactor": 1, + "user": "", + "password": "" + } + ] +} diff --git a/geotrellis/conf/input.json b/geotrellis/conf/input.json new file mode 100644 index 00000000..8244249c --- /dev/null +++ b/geotrellis/conf/input.json @@ -0,0 +1,110 @@ +[ + { + "name": "DevelopedLand", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/DevelopedLand.tiff" + }, + "cache": "NONE" + }, + { + "name": "ExampleOfWeightedSumOutput", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/ExampleOfWeightedSumOutput.tiff" + }, + "cache": "NONE" + }, + { + "name": "FarmlandOrForestedLandsWithPrimeAgriculturalSoils", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/FarmlandOrForestedLandsWithPrimeAgriculturalSoils.tiff" + }, + "cache": "NONE" + }, + { + "name": "FarmlandWithoutPrimeAgriculturalSoils", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/FarmlandWithoutPrimeAgriculturalSoils.tiff" + }, + "cache": "NONE" + }, + { + "name": "ForestedLands", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/ForestedLands.tiff" + }, + "cache": "NONE" + }, + { + "name": "ImperviousSurfacesBarrenLandsOpenWater", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/ImperviousSurfacesBarrenLandsOpenWater.tiff" + }, + "cache": "NONE" + }, + { + "name": "NonWorkingProtectedOrPublicLands", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/NonWorkingProtectedOrPublicLands.tiff" + }, + "cache": "NONE" + }, + { + "name": "PrimeAgriculturalSoilsNotForestedOrFarmland", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/PrimeAgriculturalSoilsNotForestedOrFarmland.tiff" + }, + "cache": "NONE" + }, + { + "name": "PrivatelyOwnedWorkingLandsWithEasements", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/PrivatelyOwnedWorkingLandsWithEasements.tiff" + }, + "cache": "NONE" + }, + { + "name": "PublicallyOwnedWorkingLands", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/PublicallyOwnedWorkingLands.tiff" + }, + "cache": "NONE" + }, + { + "name": "Wetlands", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/Wetlands.tiff" + }, + "cache": "NONE" + }, + { + "name": "mask", + "format": "geotiff", + "backend": { + "type": "hadoop", + "path": "file:///data/arg_wm/mask.tiff" + }, + "cache": "NONE" + } +] diff --git a/geotrellis/conf/output.json b/geotrellis/conf/output.json new file mode 100644 index 00000000..91db1a52 --- /dev/null +++ b/geotrellis/conf/output.json @@ -0,0 +1,20 @@ +{ + "backend": { + "type": "accumulo", + "path": "chattanooga", + "profile": "accumulo-local" + }, + "reprojectMethod": "buffered", + "cellSize": { + "width": 256.0, + "height": 256.0 + }, + "tileSize": 256, + "pyramid": true, + "resampleMethod": "nearest-neighbor", + "keyIndexMethod": { + "type": "zorder" + }, + "layoutScheme": "zoomed", + "crs": "EPSG:3857" +} diff --git a/geotrellis/ingest.sh b/geotrellis/ingest.sh index 52d60ac3..318207b3 100755 --- a/geotrellis/ingest.sh +++ b/geotrellis/ingest.sh @@ -1,62 +1,19 @@ #!/usr/bin/env bash -realpath () -{ - f=$@; - if [ -d "$f" ]; then - base=""; - dir="$f"; - else - base="/$(basename "$f")"; - dir=$(dirname "$f"); - fi; - dir=$(cd "$dir" && /bin/pwd); - echo "$dir$base" -} - -# Ingest tiled GeoTiff into Accumulo - -# Geotrellis (gt-admin) ingest jar +# GeoTrellis ingest jar export JAR="target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar" -# Directory with the input tiled GeoTiff's -LAYERS="./data/arg_wm" - -# Table to store tiles -TABLE="chattanooga" - -# Destination spatial reference system -CRS="EPSG:3857" - -LAYOUT_SCHEME="tms" - -# Accumulo conf -INSTANCE="gis" -USER="root" -PASSWORD="secret" -ZOOKEEPER="localhost" - # Remove some bad signatures from the assembled JAR zip -d $JAR META-INF/ECLIPSEF.RSA > /dev/null zip -d $JAR META-INF/ECLIPSEF.SF > /dev/null -# Go through all layers and run the spark submit job -for LAYER in $(ls $LAYERS) -do - - LAYERNAME=${LAYER%.*} - INPUT=file:$(realpath $LAYERS/$LAYER) - - echo "spark-submit \ - --class geotrellis.chatta.ChattaIngest --driver-memory=2G $JAR \ - --input hadoop --format geotiff --cache NONE -I path=$INPUT \ - --output accumulo -O instance=$INSTANCE table=$TABLE user=$USER password=$PASSWORD zookeeper=$ZOOKEEPER \ - --layer $LAYERNAME --pyramid --crs $CRS --layoutScheme $LAYOUT_SCHEME" - - spark-submit \ - --class geotrellis.chatta.ChattaIngest --driver-memory=2G $JAR \ - --input hadoop --format geotiff --cache NONE -I path=$INPUT \ - --output accumulo -O instance=$INSTANCE table=$TABLE user=$USER password=$PASSWORD zookeeper=$ZOOKEEPER \ - --layer $LAYERNAME --pyramid --crs $CRS --layoutScheme $LAYOUT_SCHEME +echo "--class geotrellis.chatta.ChattaIngest --driver-memory=2G target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar \ + --input file:///${PWD}/conf/input.json \ + --output file://${PWD}/conf/output.json \ + --backend-profiles file://${PWD}/conf/backend-profiles.json" -done +spark-submit \ + --class geotrellis.chatta.ChattaIngest --driver-memory=2G target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar \ + --input "file:///${PWD}/conf/input.json" \ + --output "file://${PWD}/conf/output.json" \ + --backend-profiles "file://${PWD}/conf/backend-profiles.json" diff --git a/geotrellis/project/plugins.sbt b/geotrellis/project/plugins.sbt index 774953a0..cdb2aeba 100644 --- a/geotrellis/project/plugins.sbt +++ b/geotrellis/project/plugins.sbt @@ -1,3 +1,5 @@ addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.1") + +addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.8.2") diff --git a/geotrellis/run-server.sh b/geotrellis/run-server.sh new file mode 100755 index 00000000..0f0aeddd --- /dev/null +++ b/geotrellis/run-server.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# GeoTrellis ingest jar +export JAR="target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar" + +# Remove some bad signatures from the assembled JAR +zip -d $JAR META-INF/ECLIPSEF.RSA > /dev/null +zip -d $JAR META-INF/ECLIPSEF.SF > /dev/null + +echo "--class geotrellis.chatta.Main --driver-memory=2G target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar" + +spark-submit --class geotrellis.chatta.Main --driver-memory=2G target/scala-2.10/GeoTrellis-Tutorial-Project-assembly-0.1-SNAPSHOT.jar diff --git a/geotrellis/src/main/resources/application.conf b/geotrellis/src/main/resources/application.conf index 5806d9b8..a3358540 100644 --- a/geotrellis/src/main/resources/application.conf +++ b/geotrellis/src/main/resources/application.conf @@ -1,9 +1,41 @@ -geotrellis.port = 8777 -geotrellis.server.static-path = "../static" -geotrellis.catalog = "data/catalog.json" -geotrellis.hostname = "localhost" -zookeeper.address = "localhost" -accumulo.instance = "gis" -accumulo.user = "root" -accumulo.password = "secret" -spark.master = "local[*]" \ No newline at end of file +geotrellis { + port = 8777 + server.static-path = "../static" + catalog = "data/catalog.json" + hostname = "localhost" + backend = "accumulo" +} + +accumulo { + instance = "gis" + user = "root" + password = "secret" + zookeepers = "localhost" +} + +hbase { + zookeepers = "localhost" + master = "localhost" +} + +cassandra { + hosts = ["localhost"] + user = "" + password = "" + catalog = "metadata" + keyspace = "geotrellis" + replicationStrategy = "SimpleStrategy" + replicationFactor = 1 + localDc = "datacenter1" + usedHostsPerRemoteDc = 0 + allowRemoteDCsForLocalConsistencyLevel = false +} + +s3 { + bucket = "geotrellis-test" + prefix = "chatta-demo" +} + +hadoop.path = "/chatta-demo" +file.path = "/tmp/chatta-demo" +spark.master = "local[*]" diff --git a/geotrellis/src/main/scala/geotrellis/chatta/ChattaIngest.scala b/geotrellis/src/main/scala/geotrellis/chatta/ChattaIngest.scala index dcdfd273..5fc77329 100644 --- a/geotrellis/src/main/scala/geotrellis/chatta/ChattaIngest.scala +++ b/geotrellis/src/main/scala/geotrellis/chatta/ChattaIngest.scala @@ -4,16 +4,16 @@ import geotrellis.raster.Tile import geotrellis.spark.SpatialKey import geotrellis.spark.etl.Etl import geotrellis.spark._ -import geotrellis.spark.io._ -import geotrellis.spark.io.accumulo._ -import geotrellis.spark.io.index.ZCurveKeyIndexMethod import geotrellis.spark.util.SparkUtils -import geotrellis.spark.ingest._ import geotrellis.vector.ProjectedExtent + import org.apache.spark.SparkConf object ChattaIngest extends App { implicit val sc = SparkUtils.createSparkContext("GeoTrellis ETL SinglebandIngest", new SparkConf(true)) - Etl.ingest[ProjectedExtent, SpatialKey, Tile](args, ZCurveKeyIndexMethod) - sc.stop() + try { + Etl.ingest[ProjectedExtent, SpatialKey, Tile](args) + } finally { + sc.stop() + } } diff --git a/geotrellis/src/main/scala/geotrellis/chatta/ChattaServiceActor.scala b/geotrellis/src/main/scala/geotrellis/chatta/ChattaServiceActor.scala index e3564342..68eae3d9 100644 --- a/geotrellis/src/main/scala/geotrellis/chatta/ChattaServiceActor.scala +++ b/geotrellis/src/main/scala/geotrellis/chatta/ChattaServiceActor.scala @@ -9,13 +9,12 @@ import geotrellis.services._ import geotrellis.spark._ import geotrellis.spark.io.AttributeStore.Fields import geotrellis.spark.io._ -import geotrellis.spark.io.accumulo._ +import geotrellis.spark.io.cassandra._ import geotrellis.vector.io.json.Implicits._ import geotrellis.vector.Polygon import geotrellis.vector.reproject._ import akka.actor._ -import org.apache.accumulo.core.client.security.tokens.PasswordToken import com.typesafe.config.Config import org.apache.spark.{SparkConf, SparkContext} import spray.http._ @@ -23,13 +22,14 @@ import spray.httpx.SprayJsonSupport._ import spray.json._ import spray.routing._ +import scala.collection.JavaConversions._ + class ChattaServiceActor(override val staticPath: String, config: Config) extends Actor with ChattaService { - val conf = AvroRegistrator(new SparkConf() - .setMaster(config.getString("spark.master")) - .setAppName("ChattaDemo") - .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") - .set("spark.kryo.registrator", "geotrellis.spark.io.kryo.KryoRegistrator") - .setJars(SparkContext.jarOfObject(this).toList) + val conf = AvroRegistrator( + new SparkConf() + .setAppName("ChattaDemo") + .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .set("spark.kryo.registrator", "geotrellis.spark.io.kryo.KryoRegistrator") ) implicit val sparkContext = new SparkContext(conf) @@ -37,22 +37,15 @@ class ChattaServiceActor(override val staticPath: String, config: Config) extend override def actorRefFactory = context override def receive = runRoute(serviceRoute) - override val accumulo = AccumuloInstance( - config.getString("accumulo.instance"), - config.getString("zookeeper.address"), - config.getString("accumulo.user"), - new PasswordToken(config.getString("accumulo.password")) - ) + lazy val (reader, tileReader, attributeStore) = initBackend(config) } trait ChattaService extends HttpService with LazyLogging { implicit val sparkContext: SparkContext - implicit val executionContext = actorRefFactory.dispatcher - val accumulo: AccumuloInstance - lazy val reader = AccumuloLayerReader(accumulo) - lazy val tileReader = AccumuloValueReader(accumulo) - lazy val attributeStore = AccumuloAttributeStore(accumulo.connector) + val reader: FilteringLayerReader[LayerId] + val tileReader: ValueReader[LayerId] + val attributeStore: AttributeStore val staticPath: String val baseZoomLevel = 9 @@ -61,7 +54,7 @@ trait ChattaService extends HttpService with LazyLogging { LayerId(layer, baseZoomLevel) def getMetaData(id: LayerId): TileLayerMetadata[SpatialKey] = - attributeStore.read[TileLayerMetadata[SpatialKey]](id, Fields.metadata) + attributeStore.readMetadata[TileLayerMetadata[SpatialKey]](id) def serviceRoute = get { pathPrefix("gt") { diff --git a/geotrellis/src/main/scala/geotrellis/chatta/LazyLogging.scala b/geotrellis/src/main/scala/geotrellis/chatta/LazyLogging.scala index f7431863..c73b45d8 100644 --- a/geotrellis/src/main/scala/geotrellis/chatta/LazyLogging.scala +++ b/geotrellis/src/main/scala/geotrellis/chatta/LazyLogging.scala @@ -1,6 +1,7 @@ package geotrellis.chatta import org.apache.log4j.{PatternLayout, WriterAppender, Logger} + import scala.collection.mutable import java.io.StringWriter diff --git a/geotrellis/src/main/scala/geotrellis/chatta/ModelSpark.scala b/geotrellis/src/main/scala/geotrellis/chatta/ModelSpark.scala index c73b0f43..21817d73 100644 --- a/geotrellis/src/main/scala/geotrellis/chatta/ModelSpark.scala +++ b/geotrellis/src/main/scala/geotrellis/chatta/ModelSpark.scala @@ -3,8 +3,6 @@ package geotrellis.chatta import geotrellis.raster._ import geotrellis.spark._ import geotrellis.spark.io._ -import geotrellis.spark.io.accumulo._ -import geotrellis.spark.ingest._ import geotrellis.vector._ import org.apache.spark.rdd.RDD @@ -34,7 +32,7 @@ object LayerRatio { object ModelSpark { def weightedOverlay(layers: Iterable[String], weights: Iterable[Int], zoom: Int, rasterExtent: RasterExtent) - (reader: AccumuloLayerReader): RDD[(SpatialKey, Tile)] = { + (reader: FilteringLayerReader[LayerId]): RDD[(SpatialKey, Tile)] = { val layerIds = layers.map(LayerId(_, zoom)) val maskId = LayerId("mask", zoom) @@ -53,7 +51,7 @@ object ModelSpark { } def summary(layers: Iterable[String], weights: Iterable[Int], zoom: Int, polygon: Polygon) - (reader: AccumuloLayerReader): SummaryResult = { + (reader: FilteringLayerReader[LayerId]): SummaryResult = { val layerIds = layers.map(LayerId(_, zoom)) val layerRatios = diff --git a/geotrellis/src/main/scala/geotrellis/chatta/package.scala b/geotrellis/src/main/scala/geotrellis/chatta/package.scala new file mode 100644 index 00000000..cc759173 --- /dev/null +++ b/geotrellis/src/main/scala/geotrellis/chatta/package.scala @@ -0,0 +1,68 @@ +package geotrellis + +import geotrellis.spark.LayerId +import geotrellis.spark.io._ +import geotrellis.spark.io.s3._ +import geotrellis.spark.io.accumulo._ +import geotrellis.spark.io.hbase._ +import geotrellis.spark.io.cassandra._ +import geotrellis.spark.io.hadoop._ +import geotrellis.spark.io.file._ + +import org.apache.accumulo.core.client.security.tokens.PasswordToken +import com.typesafe.config.Config +import org.apache.spark.SparkContext + +import scala.collection.JavaConversions._ + +package object chatta { + def initBackend(config: Config)(implicit cs: SparkContext): (FilteringLayerReader[LayerId], ValueReader[LayerId], AttributeStore) = { + config.getString("geotrellis.backend") match { + case "s3" => { + val (bucket, prefix) = config.getString("s3.bucket") -> config.getString("s3.prefix") + val attributeStore = S3AttributeStore(bucket, prefix) + + (S3LayerReader(attributeStore), S3ValueReader(bucket, prefix), attributeStore) + } + case "accumulo" => { + val instance = AccumuloInstance( + config.getString("accumulo.instance"), + config.getString("accumulo.zookeepers"), + config.getString("accumulo.user"), + new PasswordToken(config.getString("accumulo.password")) + ) + + (AccumuloLayerReader(instance), AccumuloValueReader(instance), AccumuloAttributeStore(instance)) + } + case "hbase" => { + val instance = HBaseInstance( + config.getString("hbase.zookeepers").split(","), + config.getString("accumulo.master") + ) + + (HBaseLayerReader(instance), HBaseValueReader(instance), HBaseAttributeStore(instance)) + } + case "cassandra" => { + val instance = BaseCassandraInstance( + config.getStringList("cassandra.hosts").toList, + config.getString("cassandra.user"), + config.getString("cassandra.password"), + config.getString("cassandra.replicationStrategy"), + config.getInt("cassandra.replicationFactor") + ) + + (CassandraLayerReader(instance), CassandraValueReader(instance), CassandraAttributeStore(instance)) + } + case "hadoop" => { + val path = config.getString("hadoop.path") + (HadoopLayerReader(path), HadoopValueReader(path), HadoopAttributeStore(path)) + } + case "file" => { + val path = config.getString("file.path") + (FileLayerReader(path), FileValueReader(path), FileAttributeStore(path)) + } + case s => throw new Exception(s"not supported backend: $s") + } + } + +}