From ec2d6f172623194b5fc9ead843e083ac84781531 Mon Sep 17 00:00:00 2001 From: SemanticBeeng Date: Sun, 9 Feb 2020 08:41:08 -0800 Subject: [PATCH] `#todo` fix [error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:102:16: not found: value spark_bam [error] import spark_bam._, hammerlab.path._ [error] ^ [error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:102:29: not found: value hammerlab [error] import spark_bam._, hammerlab.path._ [error] ^ [error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:103:23: not found: value Path [error] val bamPath = Path(resolvedPath) [error] ^ [error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:106:12: value loadReads is not a member of org.apache.spark.SparkContext [error] possible cause: maybe a semicolon is missing before `value loadReads'? [error] .loadReads(bamPath) [error] ^ [error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/VCF/VCFRelation.scala:3:11: object projectglow is not a member of package io [error] import io.projectglow.Glow [error] ^ [error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/VCF/VCFRelation.scala:35:7: not found: value Glow [error] Glow.transform(m.toLowerCase(), inputDf, Map("reference_genome_path" -> ref_genome_path.get)) [error] ^ [error] 6 errors found See fix for `AliasViewChild` as per https://github.com/apache/spark/pull/22713 --- .gitignore | 3 ++- build.sbt | 14 +++++++------- .../sequila/rangejoins/common/Main.scala | 15 +++++++++------ .../sequila/utvf/SeQuiLaAnalyzer.scala | 2 +- .../sequila/utvf/SequilaSession.scala | 2 +- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.gitignore b/.gitignore index 2d09fd76..d2b5fbd5 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,8 @@ target/ build/ project/.boot/ - +project/.sbtboot +project/.ivy .DS_Store diff --git a/build.sbt b/build.sbt index d9b30a21..5d681117 100644 --- a/build.sbt +++ b/build.sbt @@ -44,7 +44,7 @@ libraryDependencies += "org.hammerlab.bdg-utils" %% "cli" % "0.3.0" libraryDependencies += "com.github.samtools" % "htsjdk" % "2.19.0" -libraryDependencies += "ch.cern.sparkmeasure" %% "spark-measure" % "0.13" excludeAll (ExclusionRule("org.apache.hadoop")) +libraryDependencies += "ch.cern.sparkmeasure" %% "spark-measure" % "0.15" excludeAll (ExclusionRule("org.apache.hadoop")) libraryDependencies += "org.broadinstitute" % "gatk-native-bindings" % "1.0.0" excludeAll (ExclusionRule("org.apache.hadoop")) libraryDependencies += "org.apache.logging.log4j" % "log4j-core" % "2.11.0" @@ -53,20 +53,20 @@ libraryDependencies += "org.apache.logging.log4j" % "log4j-api" % "2.11.0" libraryDependencies += "com.intel.gkl" % "gkl" % "0.8.5-1-darwin-SNAPSHOT" libraryDependencies += "com.intel.gkl" % "gkl" % "0.8.5-1-linux-SNAPSHOT" -libraryDependencies += "org.hammerlab.bam" % "load_2_11" % "1.2.0-M1" +//libraryDependencies += "org.hammerlab.bam" % "load_2_11" % "1.2.0-M1" #todo find replacement libraryDependencies += "de.ruedigermoeller" % "fst" % "2.57" libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.7" -libraryDependencies += "org.eclipse.jetty" % "jetty-servlet" % "9.3.24.v20180605" -libraryDependencies += "org.apache.derby" % "derbyclient" % "10.14.2.0" +//libraryDependencies += "org.eclipse.jetty" % "jetty-servlet" % "9.3.24.v20180605" //#todo needed? +//libraryDependencies += "org.apache.derby" % "derbyclient" % "10.14.2.0" //#todo needed? - -libraryDependencies += "org.biodatageeks" % "bdg-performance_2.11" % "0.2-SNAPSHOT" excludeAll (ExclusionRule("org.apache.hadoop")) +//libraryDependencies += "org.biodatageeks" % "bdg-performance_2.11" % "0.2-SNAPSHOT" excludeAll (ExclusionRule("org.apache.hadoop")) +libraryDependencies += "org.bdgenomics.bdg-formats" % "bdg-formats" % "0.15.0" //#todo confirm libraryDependencies += "org.disq-bio" % "disq" % "0.3.3" -libraryDependencies += "io.projectglow" % "glow_2.11" % "0.2.0" +//libraryDependencies += "io.projectglow" % "glow_2.11" % "0.2.0" #todo find replacement avroSpecificSourceDirectories in Compile += (sourceDirectory in Compile).value / "avro/input" avroSpecificSourceDirectories in Test += (sourceDirectory in Test).value / "avro" diff --git a/src/main/scala/org/biodatageeks/sequila/rangejoins/common/Main.scala b/src/main/scala/org/biodatageeks/sequila/rangejoins/common/Main.scala index d2cdbcb6..d8e36e92 100644 --- a/src/main/scala/org/biodatageeks/sequila/rangejoins/common/Main.scala +++ b/src/main/scala/org/biodatageeks/sequila/rangejoins/common/Main.scala @@ -25,10 +25,13 @@ import org.apache.spark.sql.types.{LongType, StructField, StructType} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD -import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.adam.rdd.feature.FeatureDataset +import org.bdgenomics.adam.rdd.read.AlignmentDataset +import org.bdgenomics.formats.avro.Alignment +//import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD +//import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.formats.avro.Feature -import org.bdgenomics.adam.rdd.feature.FeatureRDD +//import org.bdgenomics.adam.rdd.feature.FeatureRDD import org.apache.spark.sql.types._ import scala.util.Random @@ -105,11 +108,11 @@ object Main { Random.setSeed(4242) - var features: FeatureRDD = sc.loadFeatures(featuresFilePath) - var alignments: AlignmentRecordRDD = sc.loadAlignments(alignmentsFilePath) + var features: FeatureDataset = sc.loadFeatures(featuresFilePath) + var alignments: AlignmentDataset = sc.loadAlignments(alignmentsFilePath) var featuresRdd: RDD[Feature] = features.rdd - var alignmentsRdd: RDD[AlignmentRecord] = alignments.rdd + var alignmentsRdd: RDD[Alignment] = alignments.rdd //get only interesting columns val fRdd = featuresRdd.map(rec => Row(rec.getStart().toInt, rec.getEnd().toInt)); diff --git a/src/main/scala/org/biodatageeks/sequila/utvf/SeQuiLaAnalyzer.scala b/src/main/scala/org/biodatageeks/sequila/utvf/SeQuiLaAnalyzer.scala index 99aa0d49..9d8404f7 100644 --- a/src/main/scala/org/biodatageeks/sequila/utvf/SeQuiLaAnalyzer.scala +++ b/src/main/scala/org/biodatageeks/sequila/utvf/SeQuiLaAnalyzer.scala @@ -66,7 +66,7 @@ class SeQuiLaAnalyzer(catalog: SessionCatalog, conf: SQLConf) extends Analyzer(c Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*), Batch("SeQuiLa", Once,sequilaOptmazationRules: _*), //SeQuilaOptimization rules Batch("View", Once, - AliasViewChild(conf)), + /*AliasViewChild*/EliminateView), Batch("Nondeterministic", Once, PullOutNondeterministic), Batch("UDF", Once, diff --git a/src/main/scala/org/biodatageeks/sequila/utvf/SequilaSession.scala b/src/main/scala/org/biodatageeks/sequila/utvf/SequilaSession.scala index ea59750c..d46b8a67 100644 --- a/src/main/scala/org/biodatageeks/sequila/utvf/SequilaSession.scala +++ b/src/main/scala/org/biodatageeks/sequila/utvf/SequilaSession.scala @@ -3,7 +3,7 @@ package org.apache.spark.sql import org.apache.spark.sql.SparkSession.Builder import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.sql.catalyst.analysis.{AliasViewChild, Analyzer, CleanupAliases, EliminateUnions, ResolveCreateNamedStruct, ResolveHints, ResolveInlineTables, ResolveTableValuedFunctions, ResolveTimeZone, SeQuiLaAnalyzer, SubstituteUnresolvedOrdinals, TimeWindowing, TypeCoercion, UpdateOuterReferences} +import org.apache.spark.sql.catalyst.analysis.{/*AliasViewChild, */Analyzer, CleanupAliases, EliminateUnions, ResolveCreateNamedStruct, ResolveHints, ResolveInlineTables, ResolveTableValuedFunctions, ResolveTimeZone, SeQuiLaAnalyzer, SubstituteUnresolvedOrdinals, TimeWindowing, TypeCoercion, UpdateOuterReferences} import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule