Skip to content

Commit

Permalink
[SPARK-22896] Improvement in String interpolation
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

* String interpolation in ml pipeline example has been corrected as per scala standard.

## How was this patch tested?
* manually tested.

Author: chetkhatri <[email protected]>

Closes #20070 from chetkhatri/mllib-chetan-contrib.
  • Loading branch information
chetkhatri authored and srowen committed Jan 3, 2018
1 parent a66fe36 commit 9a2b65a
Show file tree
Hide file tree
Showing 49 changed files with 94 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public static void main(String[] args) {
.setNumBuckets(3);

Dataset<Row> result = discretizer.fit(df).transform(df);
result.show();
result.show(false);
// $example off$
spark.stop();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ object SimpleSkewedGroupByTest {
pairs1.count

println(s"RESULT: ${pairs1.groupByKey(numReducers).count}")
// Print how many keys each reducer got (for debugging)
// println("RESULT: " + pairs1.groupByKey(numReducers)
// .map{case (k,v) => (k, v.size)}
// .collectAsMap)

spark.stop()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,11 @@ object Analytics extends Logging {
// TriangleCount requires the graph to be partitioned
.partitionBy(partitionStrategy.getOrElse(RandomVertexCut)).cache()
val triangles = TriangleCount.run(graph)
println("Triangles: " + triangles.vertices.map {
val triangleTypes = triangles.vertices.map {
case (vid, data) => data.toLong
}.reduce(_ + _) / 3)
}.reduce(_ + _) / 3

println(s"Triangles: ${triangleTypes}")
sc.stop()

case _ =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ object SynthBenchmark {
arg =>
arg.dropWhile(_ == '-').split('=') match {
case Array(opt, v) => (opt -> v)
case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
case _ => throw new IllegalArgumentException(s"Invalid argument: $arg")
}
}

Expand All @@ -76,7 +76,7 @@ object SynthBenchmark {
case ("sigma", v) => sigma = v.toDouble
case ("degFile", v) => degFile = v
case ("seed", v) => seed = v.toInt
case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
}

val conf = new SparkConf()
Expand All @@ -86,7 +86,7 @@ object SynthBenchmark {
val sc = new SparkContext(conf)

// Create the graph
println(s"Creating graph...")
println("Creating graph...")
val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices,
numEPart.getOrElse(sc.defaultParallelism), mu, sigma, seed)
// Repartition the graph
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ object ChiSquareTestExample {

val df = data.toDF("label", "features")
val chi = ChiSquareTest.test(df, "features", "label").head
println("pValues = " + chi.getAs[Vector](0))
println("degreesOfFreedom = " + chi.getSeq[Int](1).mkString("[", ",", "]"))
println("statistics = " + chi.getAs[Vector](2))
println(s"pValues = ${chi.getAs[Vector](0)}")
println(s"degreesOfFreedom ${chi.getSeq[Int](1).mkString("[", ",", "]")}")
println(s"statistics ${chi.getAs[Vector](2)}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ object CorrelationExample {

val df = data.map(Tuple1.apply).toDF("features")
val Row(coeff1: Matrix) = Correlation.corr(df, "features").head
println("Pearson correlation matrix:\n" + coeff1.toString)
println(s"Pearson correlation matrix:\n $coeff1")

val Row(coeff2: Matrix) = Correlation.corr(df, "features", "spearman").head
println("Spearman correlation matrix:\n" + coeff2.toString)
println(s"Spearman correlation matrix:\n $coeff2")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ object DataFrameExample {
val parser = new OptionParser[Params]("DataFrameExample") {
head("DataFrameExample: an example app using DataFrame for ML.")
opt[String]("input")
.text(s"input path to dataframe")
.text("input path to dataframe")
.action((x, c) => c.copy(input = x))
checkConfig { params =>
success
Expand Down Expand Up @@ -93,7 +93,7 @@ object DataFrameExample {
// Load the records back.
println(s"Loading Parquet file with UDT from $outputDir.")
val newDF = spark.read.parquet(outputDir)
println(s"Schema from Parquet:")
println("Schema from Parquet:")
newDF.printSchema()

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ object DecisionTreeClassificationExample {
.setPredictionCol("prediction")
.setMetricName("accuracy")
val accuracy = evaluator.evaluate(predictions)
println("Test Error = " + (1.0 - accuracy))
println(s"Test Error = ${(1.0 - accuracy)}")

val treeModel = model.stages(2).asInstanceOf[DecisionTreeClassificationModel]
println("Learned classification tree model:\n" + treeModel.toDebugString)
println(s"Learned classification tree model:\n ${treeModel.toDebugString}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ object DecisionTreeRegressionExample {
.setPredictionCol("prediction")
.setMetricName("rmse")
val rmse = evaluator.evaluate(predictions)
println("Root Mean Squared Error (RMSE) on test data = " + rmse)
println(s"Root Mean Squared Error (RMSE) on test data = $rmse")

val treeModel = model.stages(1).asInstanceOf[DecisionTreeRegressionModel]
println("Learned regression tree model:\n" + treeModel.toDebugString)
println(s"Learned regression tree model:\n ${treeModel.toDebugString}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ object DeveloperApiExample {
// Create a LogisticRegression instance. This instance is an Estimator.
val lr = new MyLogisticRegression()
// Print out the parameters, documentation, and any default values.
println("MyLogisticRegression parameters:\n" + lr.explainParams() + "\n")
println(s"MyLogisticRegression parameters:\n ${lr.explainParams()}")

// We may set parameters using setter methods.
lr.setMaxIter(10)
Expand Down Expand Up @@ -169,10 +169,10 @@ private class MyLogisticRegressionModel(
Vectors.dense(-margin, margin)
}

/** Number of classes the label can take. 2 indicates binary classification. */
// Number of classes the label can take. 2 indicates binary classification.
override val numClasses: Int = 2

/** Number of features the model was trained on. */
// Number of features the model was trained on.
override val numFeatures: Int = coefficients.size

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ object EstimatorTransformerParamExample {
// Create a LogisticRegression instance. This instance is an Estimator.
val lr = new LogisticRegression()
// Print out the parameters, documentation, and any default values.
println("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
println(s"LogisticRegression parameters:\n ${lr.explainParams()}\n")

// We may set parameters using setter methods.
lr.setMaxIter(10)
Expand All @@ -58,7 +58,7 @@ object EstimatorTransformerParamExample {
// we can view the parameters it used during fit().
// This prints the parameter (name: value) pairs, where names are unique IDs for this
// LogisticRegression instance.
println("Model 1 was fit using parameters: " + model1.parent.extractParamMap)
println(s"Model 1 was fit using parameters: ${model1.parent.extractParamMap}")

// We may alternatively specify parameters using a ParamMap,
// which supports several methods for specifying parameters.
Expand All @@ -73,7 +73,7 @@ object EstimatorTransformerParamExample {
// Now learn a new model using the paramMapCombined parameters.
// paramMapCombined overrides all parameters set earlier via lr.set* methods.
val model2 = lr.fit(training, paramMapCombined)
println("Model 2 was fit using parameters: " + model2.parent.extractParamMap)
println(s"Model 2 was fit using parameters: ${model2.parent.extractParamMap}")

// Prepare test data.
val test = spark.createDataFrame(Seq(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ object GradientBoostedTreeClassifierExample {
.setPredictionCol("prediction")
.setMetricName("accuracy")
val accuracy = evaluator.evaluate(predictions)
println("Test Error = " + (1.0 - accuracy))
println(s"Test Error = ${1.0 - accuracy}")

val gbtModel = model.stages(2).asInstanceOf[GBTClassificationModel]
println("Learned classification GBT model:\n" + gbtModel.toDebugString)
println(s"Learned classification GBT model:\n ${gbtModel.toDebugString}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ object GradientBoostedTreeRegressorExample {
.setPredictionCol("prediction")
.setMetricName("rmse")
val rmse = evaluator.evaluate(predictions)
println("Root Mean Squared Error (RMSE) on test data = " + rmse)
println(s"Root Mean Squared Error (RMSE) on test data = $rmse")

val gbtModel = model.stages(1).asInstanceOf[GBTRegressionModel]
println("Learned regression GBT model:\n" + gbtModel.toDebugString)
println(s"Learned regression GBT model:\n ${gbtModel.toDebugString}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ object MulticlassLogisticRegressionWithElasticNetExample {

// Print the coefficients and intercept for multinomial logistic regression
println(s"Coefficients: \n${lrModel.coefficientMatrix}")
println(s"Intercepts: ${lrModel.interceptVector}")
println(s"Intercepts: \n${lrModel.interceptVector}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ object MultilayerPerceptronClassifierExample {
val evaluator = new MulticlassClassificationEvaluator()
.setMetricName("accuracy")

println("Test set accuracy = " + evaluator.evaluate(predictionAndLabels))
println(s"Test set accuracy = ${evaluator.evaluate(predictionAndLabels)}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ object NaiveBayesExample {
.setPredictionCol("prediction")
.setMetricName("accuracy")
val accuracy = evaluator.evaluate(predictions)
println("Test set accuracy = " + accuracy)
println(s"Test set accuracy = $accuracy")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ object QuantileDiscretizerExample {
// Output of QuantileDiscretizer for such small datasets can depend on the number of
// partitions. Here we force a single partition to ensure consistent results.
// Note this is not necessary for normal use cases
.repartition(1)
.repartition(1)

// $example on$
val discretizer = new QuantileDiscretizer()
Expand All @@ -45,7 +45,7 @@ object QuantileDiscretizerExample {
.setNumBuckets(3)

val result = discretizer.fit(df).transform(df)
result.show()
result.show(false)
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ object RandomForestClassifierExample {
.setPredictionCol("prediction")
.setMetricName("accuracy")
val accuracy = evaluator.evaluate(predictions)
println("Test Error = " + (1.0 - accuracy))
println(s"Test Error = ${(1.0 - accuracy)}")

val rfModel = model.stages(2).asInstanceOf[RandomForestClassificationModel]
println("Learned classification forest model:\n" + rfModel.toDebugString)
println(s"Learned classification forest model:\n ${rfModel.toDebugString}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ object RandomForestRegressorExample {
.setPredictionCol("prediction")
.setMetricName("rmse")
val rmse = evaluator.evaluate(predictions)
println("Root Mean Squared Error (RMSE) on test data = " + rmse)
println(s"Root Mean Squared Error (RMSE) on test data = $rmse")

val rfModel = model.stages(1).asInstanceOf[RandomForestRegressionModel]
println("Learned regression forest model:\n" + rfModel.toDebugString)
println(s"Learned regression forest model:\n ${rfModel.toDebugString}")
// $example off$

spark.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ object VectorIndexerExample {
val indexerModel = indexer.fit(data)

val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
println(s"Chose ${categoricalFeatures.size} categorical features: " +
categoricalFeatures.mkString(", "))
println(s"Chose ${categoricalFeatures.size} " +
s"categorical features: ${categoricalFeatures.mkString(", ")}")

// Create new column "indexed" with categorical values transformed to indices
val indexedData = indexerModel.transform(data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ object AssociationRulesExample {
val results = ar.run(freqItemsets)

results.collect().foreach { rule =>
println("[" + rule.antecedent.mkString(",")
+ "=>"
+ rule.consequent.mkString(",") + "]," + rule.confidence)
println(s"[${rule.antecedent.mkString(",")}=>${rule.consequent.mkString(",")} ]" +
s" ${rule.confidence}")
}
// $example off$

Expand All @@ -53,3 +52,4 @@ object AssociationRulesExample {

}
// scalastyle:on println

Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ object BinaryClassificationMetricsExample {

// AUPRC
val auPRC = metrics.areaUnderPR
println("Area under precision-recall curve = " + auPRC)
println(s"Area under precision-recall curve = $auPRC")

// Compute thresholds used in ROC and PR curves
val thresholds = precision.map(_._1)
Expand All @@ -96,7 +96,7 @@ object BinaryClassificationMetricsExample {

// AUROC
val auROC = metrics.areaUnderROC
println("Area under ROC = " + auROC)
println(s"Area under ROC = $auROC")
// $example off$
sc.stop()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ object DecisionTreeClassificationExample {
(point.label, prediction)
}
val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble / testData.count()
println("Test Error = " + testErr)
println("Learned classification tree model:\n" + model.toDebugString)
println(s"Test Error = $testErr")
println(s"Learned classification tree model:\n ${model.toDebugString}")

// Save and load model
model.save(sc, "target/tmp/myDecisionTreeClassificationModel")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ object DecisionTreeRegressionExample {
(point.label, prediction)
}
val testMSE = labelsAndPredictions.map{ case (v, p) => math.pow(v - p, 2) }.mean()
println("Test Mean Squared Error = " + testMSE)
println("Learned regression tree model:\n" + model.toDebugString)
println(s"Test Mean Squared Error = $testMSE")
println(s"Learned regression tree model:\n ${model.toDebugString}")

// Save and load model
model.save(sc, "target/tmp/myDecisionTreeRegressionModel")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ object FPGrowthExample {
println(s"Number of frequent itemsets: ${model.freqItemsets.count()}")

model.freqItemsets.collect().foreach { itemset =>
println(itemset.items.mkString("[", ",", "]") + ", " + itemset.freq)
println(s"${itemset.items.mkString("[", ",", "]")}, ${itemset.freq}")
}

sc.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ object GradientBoostingClassificationExample {
(point.label, prediction)
}
val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
println("Test Error = " + testErr)
println("Learned classification GBT model:\n" + model.toDebugString)
println(s"Test Error = $testErr")
println(s"Learned classification GBT model:\n ${model.toDebugString}")

// Save and load model
model.save(sc, "target/tmp/myGradientBoostingClassificationModel")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ object GradientBoostingRegressionExample {
(point.label, prediction)
}
val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
println("Test Mean Squared Error = " + testMSE)
println("Learned regression GBT model:\n" + model.toDebugString)
println(s"Test Mean Squared Error = $testMSE")
println(s"Learned regression GBT model:\n ${model.toDebugString}")

// Save and load model
model.save(sc, "target/tmp/myGradientBoostingRegressionModel")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ object HypothesisTestingExample {
// against the label.
val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
featureTestResults.zipWithIndex.foreach { case (k, v) =>
println("Column " + (v + 1).toString + ":")
println(s"Column ${(v + 1)} :")
println(k)
} // summary of the test
// $example off$
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ object IsotonicRegressionExample {

// Calculate mean squared error between predicted and real labels.
val meanSquaredError = predictionAndLabel.map { case (p, l) => math.pow((p - l), 2) }.mean()
println("Mean Squared Error = " + meanSquaredError)
println(s"Mean Squared Error = $meanSquaredError")

// Save and load model
model.save(sc, "target/tmp/myIsotonicRegressionModel")
Expand Down
Loading

0 comments on commit 9a2b65a

Please sign in to comment.