Skip to content

Commit

Permalink
#todo fix
Browse files Browse the repository at this point in the history
[error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:102:16: not found: value spark_bam
[error]         import spark_bam._, hammerlab.path._
[error]                ^
[error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:102:29: not found: value hammerlab
[error]         import spark_bam._, hammerlab.path._
[error]                             ^
[error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:103:23: not found: value Path
[error]         val bamPath = Path(resolvedPath)
[error]                       ^
[error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/BAM/AlignmentRelation.scala:106:12: value loadReads is not a member of org.apache.spark.SparkContext
[error] possible cause: maybe a semicolon is missing before `value loadReads'?
[error]           .loadReads(bamPath)
[error]            ^
[error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/VCF/VCFRelation.scala:3:11: object projectglow is not a member of package io
[error] import io.projectglow.Glow
[error]           ^
[error] /development/projects/05_ds/spark/bdg-sequila/src/main/scala/org/biodatageeks/sequila/datasources/VCF/VCFRelation.scala:35:7: not found: value Glow
[error]       Glow.transform(m.toLowerCase(), inputDf, Map("reference_genome_path" -> ref_genome_path.get))
[error]       ^
[error] 6 errors found

See fix for `AliasViewChild` as per apache/spark#22713
  • Loading branch information
SemanticBeeng committed Feb 9, 2020
1 parent 9dd7476 commit ec2d6f1
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 16 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ target/

build/
project/.boot/

project/.sbtboot
project/.ivy

.DS_Store

Expand Down
14 changes: 7 additions & 7 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ libraryDependencies += "org.hammerlab.bdg-utils" %% "cli" % "0.3.0"
libraryDependencies += "com.github.samtools" % "htsjdk" % "2.19.0"


libraryDependencies += "ch.cern.sparkmeasure" %% "spark-measure" % "0.13" excludeAll (ExclusionRule("org.apache.hadoop"))
libraryDependencies += "ch.cern.sparkmeasure" %% "spark-measure" % "0.15" excludeAll (ExclusionRule("org.apache.hadoop"))

libraryDependencies += "org.broadinstitute" % "gatk-native-bindings" % "1.0.0" excludeAll (ExclusionRule("org.apache.hadoop"))
libraryDependencies += "org.apache.logging.log4j" % "log4j-core" % "2.11.0"
Expand All @@ -53,20 +53,20 @@ libraryDependencies += "org.apache.logging.log4j" % "log4j-api" % "2.11.0"
libraryDependencies += "com.intel.gkl" % "gkl" % "0.8.5-1-darwin-SNAPSHOT"
libraryDependencies += "com.intel.gkl" % "gkl" % "0.8.5-1-linux-SNAPSHOT"

libraryDependencies += "org.hammerlab.bam" % "load_2_11" % "1.2.0-M1"
//libraryDependencies += "org.hammerlab.bam" % "load_2_11" % "1.2.0-M1" #todo find replacement

libraryDependencies += "de.ruedigermoeller" % "fst" % "2.57"
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.7"
libraryDependencies += "org.eclipse.jetty" % "jetty-servlet" % "9.3.24.v20180605"
libraryDependencies += "org.apache.derby" % "derbyclient" % "10.14.2.0"
//libraryDependencies += "org.eclipse.jetty" % "jetty-servlet" % "9.3.24.v20180605" //#todo needed?
//libraryDependencies += "org.apache.derby" % "derbyclient" % "10.14.2.0" //#todo needed?


libraryDependencies += "org.biodatageeks" % "bdg-performance_2.11" % "0.2-SNAPSHOT" excludeAll (ExclusionRule("org.apache.hadoop"))
//libraryDependencies += "org.biodatageeks" % "bdg-performance_2.11" % "0.2-SNAPSHOT" excludeAll (ExclusionRule("org.apache.hadoop"))
libraryDependencies += "org.bdgenomics.bdg-formats" % "bdg-formats" % "0.15.0" //#todo confirm

libraryDependencies += "org.disq-bio" % "disq" % "0.3.3"


libraryDependencies += "io.projectglow" % "glow_2.11" % "0.2.0"
//libraryDependencies += "io.projectglow" % "glow_2.11" % "0.2.0" #todo find replacement

avroSpecificSourceDirectories in Compile += (sourceDirectory in Compile).value / "avro/input"
avroSpecificSourceDirectories in Test += (sourceDirectory in Test).value / "avro"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@ import org.apache.spark.sql.types.{LongType, StructField, StructType}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.adam.rdd.feature.FeatureDataset
import org.bdgenomics.adam.rdd.read.AlignmentDataset
import org.bdgenomics.formats.avro.Alignment
//import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
//import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.formats.avro.Feature
import org.bdgenomics.adam.rdd.feature.FeatureRDD
//import org.bdgenomics.adam.rdd.feature.FeatureRDD
import org.apache.spark.sql.types._

import scala.util.Random
Expand Down Expand Up @@ -105,11 +108,11 @@ object Main {
Random.setSeed(4242)


var features: FeatureRDD = sc.loadFeatures(featuresFilePath)
var alignments: AlignmentRecordRDD = sc.loadAlignments(alignmentsFilePath)
var features: FeatureDataset = sc.loadFeatures(featuresFilePath)
var alignments: AlignmentDataset = sc.loadAlignments(alignmentsFilePath)

var featuresRdd: RDD[Feature] = features.rdd
var alignmentsRdd: RDD[AlignmentRecord] = alignments.rdd
var alignmentsRdd: RDD[Alignment] = alignments.rdd
//get only interesting columns

val fRdd = featuresRdd.map(rec => Row(rec.getStart().toInt, rec.getEnd().toInt));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class SeQuiLaAnalyzer(catalog: SessionCatalog, conf: SQLConf) extends Analyzer(c
Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
Batch("SeQuiLa", Once,sequilaOptmazationRules: _*), //SeQuilaOptimization rules
Batch("View", Once,
AliasViewChild(conf)),
/*AliasViewChild*/EliminateView),
Batch("Nondeterministic", Once,
PullOutNondeterministic),
Batch("UDF", Once,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package org.apache.spark.sql

import org.apache.spark.sql.SparkSession.Builder
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.catalyst.analysis.{AliasViewChild, Analyzer, CleanupAliases, EliminateUnions, ResolveCreateNamedStruct, ResolveHints, ResolveInlineTables, ResolveTableValuedFunctions, ResolveTimeZone, SeQuiLaAnalyzer, SubstituteUnresolvedOrdinals, TimeWindowing, TypeCoercion, UpdateOuterReferences}
import org.apache.spark.sql.catalyst.analysis.{/*AliasViewChild, */Analyzer, CleanupAliases, EliminateUnions, ResolveCreateNamedStruct, ResolveHints, ResolveInlineTables, ResolveTableValuedFunctions, ResolveTimeZone, SeQuiLaAnalyzer, SubstituteUnresolvedOrdinals, TimeWindowing, TypeCoercion, UpdateOuterReferences}
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
Expand Down

0 comments on commit ec2d6f1

Please sign in to comment.