From 27987b5e3cc7ffe6cbeddb8606c0dba2b4019a5f Mon Sep 17 00:00:00 2001 From: Jerry Li Date: Thu, 19 Jul 2012 10:43:14 -0700 Subject: [PATCH] Added WhaleReport and spec WhaleReport finds all traces w/ 500 Internal Server Errors and finds all spans in those traces with retries and/or timeouts Author: @jerryli9876 Fixes #71 URL: https://github.com/twitter/zipkin/pull/71 --- .../zipkin/hadoop/DependencyTree.scala | 4 +- .../zipkin/hadoop/ExpensiveEndpoints.scala | 2 +- .../zipkin/hadoop/MemcacheRequest.scala | 2 +- .../zipkin/hadoop/MostCommonCalls.scala | 42 ----------- .../zipkin/hadoop/PopularAnnotations.scala | 2 +- .../twitter/zipkin/hadoop/PopularKeys.scala | 2 +- .../zipkin/hadoop/ServerResponsetime.scala | 2 +- .../com/twitter/zipkin/hadoop/Timeouts.scala | 2 +- .../twitter/zipkin/hadoop/WhaleReport.scala | 73 +++++++++++++++++++ .../twitter/zipkin/hadoop/WorstRuntimes.scala | 2 +- zipkin-hadoop/src/scripts/run.sh | 1 - .../zipkin/hadoop/DependencyTreeSpec.scala | 2 +- .../hadoop/ExpensiveEndpointsSpec.scala | 2 +- .../zipkin/hadoop/MemcacheRequestSpec.scala | 2 +- .../hadoop/PopularAnnotationsSpec.scala | 2 +- .../zipkin/hadoop/PopularKeysSpec.scala | 2 +- .../hadoop/ServerResponsetimeSpec.scala | 3 +- .../twitter/zipkin/hadoop/TimeoutsSpec.scala | 2 +- .../com/twitter/zipkin/hadoop/UtilSpec.scala | 4 +- ...eCallsSpec.scala => WhaleReportSpec.scala} | 58 +++++++++------ .../zipkin/hadoop/WorstRuntimesSpec.scala | 2 +- 21 files changed, 126 insertions(+), 87 deletions(-) delete mode 100644 zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MostCommonCalls.scala create mode 100644 zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WhaleReport.scala rename zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/{CommonServiceCallsSpec.scala => WhaleReportSpec.scala} (51%) diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/DependencyTree.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/DependencyTree.scala index 9ca9358df07..1ad23d9de41 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/DependencyTree.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/DependencyTree.scala @@ -19,14 +19,13 @@ package com.twitter.zipkin.hadoop import com.twitter.scalding._ import cascading.pipe.joiner._ import com.twitter.zipkin.gen.{SpanServiceName, BinaryAnnotation, Span, Annotation} -import sources.{PrepTsvSource, PreprocessedSpanSourceTest, PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, PreprocessedSpanSourceTest, PreprocessedSpanSource, Util} /** * Find out how often services call each other throughout the entire system */ class DependencyTree(args: Args) extends Job(args) with DefaultDateRangeJob { - val spanInfo = PreprocessedSpanSource() .read .filter(0) { s : SpanServiceName => s.isSetParent_id() } @@ -40,5 +39,6 @@ class DependencyTree(args: Args) extends Job(args) with DefaultDateRangeJob { val spanInfoWithParent = spanInfo .joinWithSmaller('parent_id -> 'id_1, idName, joiner = new LeftJoin) .groupBy('service, 'name_1){ _.size('count) } + .groupBy('service){ _.sortBy('count) } .write(Tsv(args("output"))) } diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ExpensiveEndpoints.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ExpensiveEndpoints.scala index e9668fb81cd..261cf3e9fd4 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ExpensiveEndpoints.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ExpensiveEndpoints.scala @@ -19,7 +19,7 @@ package com.twitter.zipkin.hadoop import com.twitter.zipkin.gen.{Constants, SpanServiceName, Annotation} import cascading.pipe.joiner.LeftJoin import com.twitter.scalding.{Tsv, DefaultDateRangeJob, Job, Args} -import sources.{PrepTsvSource, Util, PreprocessedSpanSource} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, Util, PreprocessedSpanSource} /** * Per service call (i.e. pair of services), finds the average run time (in microseconds) of that service call diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MemcacheRequest.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MemcacheRequest.scala index adea943f01d..5d3d79d4090 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MemcacheRequest.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MemcacheRequest.scala @@ -20,7 +20,7 @@ import com.twitter.scalding._ import java.nio.ByteBuffer import java.util.Arrays import com.twitter.zipkin.gen.{BinaryAnnotation, Span, Constants, Annotation} -import sources.{PrepNoNamesSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepNoNamesSpanSource, Util} /** * Find out how often each service does memcache accesses diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MostCommonCalls.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MostCommonCalls.scala deleted file mode 100644 index 69dd8182c0d..00000000000 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/MostCommonCalls.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2012 Twitter Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.twitter.zipkin.hadoop - -import com.twitter.scalding._ -import cascading.pipe.joiner.LeftJoin -import com.twitter.zipkin.gen.{SpanServiceName} -import sources.{PrepTsvSource, PreprocessedSpanSource, Util} - -/** - * For each service finds the services that it most commonly calls - */ - -class MostCommonCalls(args : Args) extends Job(args) with DefaultDateRangeJob { - val spanInfo = PreprocessedSpanSource() - .read - .mapTo(0 -> ('id, 'parent_id, 'service)) - { s: SpanServiceName => (s.id, s.parent_id, s.service_name) } - - val idName = PrepTsvSource() - .read - - val result = spanInfo - .joinWithSmaller('parent_id -> 'id_1, idName, joiner = new LeftJoin) - .groupBy('service, 'name_1){ _.size('count) } - .groupBy('service){ _.sortBy('count) } - .write(Tsv(args("output"))) -} diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularAnnotations.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularAnnotations.scala index fbdd9a027fa..2a015cc7bb7 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularAnnotations.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularAnnotations.scala @@ -18,7 +18,7 @@ package com.twitter.zipkin.hadoop import com.twitter.scalding._ -import sources.PreprocessedSpanSource +import com.twitter.zipkin.hadoop.sources.PreprocessedSpanSource import com.twitter.zipkin.gen.{SpanServiceName, Annotation} /** diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularKeys.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularKeys.scala index b982112eaa2..8d1f66f9104 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularKeys.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/PopularKeys.scala @@ -18,7 +18,7 @@ package com.twitter.zipkin.hadoop import com.twitter.scalding._ -import sources.{PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PreprocessedSpanSource, Util} import com.twitter.zipkin.gen.{SpanServiceName, BinaryAnnotation, Span} /** diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ServerResponsetime.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ServerResponsetime.scala index 5fa0833526e..176b8b45cc2 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ServerResponsetime.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/ServerResponsetime.scala @@ -17,7 +17,7 @@ package com.twitter.zipkin.hadoop import com.twitter.scalding._ -import sources.SpanSource +import com.twitter.zipkin.hadoop.sources.SpanSource import com.twitter.zipkin.gen.{Span, Constants, Annotation} import scala.collection.JavaConverters._ import java.nio.ByteBuffer diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/Timeouts.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/Timeouts.scala index 0790d157911..a447568414f 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/Timeouts.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/Timeouts.scala @@ -19,7 +19,7 @@ package com.twitter.zipkin.hadoop import com.twitter.scalding._ import cascading.pipe.joiner.LeftJoin import com.twitter.zipkin.gen.{SpanServiceName, Annotation} -import sources.{PrepTsvSource, PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, PreprocessedSpanSource, Util} /** * Find which services timeout the most diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WhaleReport.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WhaleReport.scala new file mode 100644 index 00000000000..043ad020747 --- /dev/null +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WhaleReport.scala @@ -0,0 +1,73 @@ +/* + * Copyright 2012 Twitter Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.twitter.zipkin.hadoop + +import com.twitter.zipkin.gen.{BinaryAnnotation, Constants, SpanServiceName, Annotation} +import com.twitter.scalding.{Tsv, DefaultDateRangeJob, Job, Args} +import com.twitter.zipkin.hadoop.sources.{Util, PreprocessedSpanSource} +import java.nio.ByteBuffer + + +/** + * Finds traces that have 500 Internal Service Errors and finds the spans in those traces that have retries or timeouts + */ + +class WhaleReport(args: Args) extends Job(args) with DefaultDateRangeJob { + + val ERRORS = List("finagle.timeout", "finagle.retry") + + val spanInfo = PreprocessedSpanSource() + .read + .mapTo(0 ->('trace_id, 'id, 'service, 'annotations, 'binary_annotations)) + { s: SpanServiceName => (s.trace_id, s.id, s.service_name, s.annotations.toList, s.binary_annotations.toList) + } + + val errorTraces = spanInfo + .project('trace_id, 'binary_annotations) + .filter('binary_annotations) { + bal: List[BinaryAnnotation] => + bal.exists({ ba: BinaryAnnotation => { + ba != null && ba.value != null && cleanString(ba.value) == WhaleReport.ERROR_MESSAGE + } + }) + } + .project('trace_id) + .rename('trace_id -> 'trace_id_1) + + val filtered = spanInfo + .flatMap('annotations -> 'error) { al : List[Annotation] => { al.find { a : Annotation => ERRORS.contains(a.value) } } } + .joinWithSmaller('trace_id -> 'trace_id_1, errorTraces) + .discard('trace_id_1) + .groupBy('trace_id) { _.toList[String]('service -> 'serviceList) } + .write(Tsv(args("output"))) + + // When converting from ByteBuffer to String some null values seem to be passed along, so we clean them + private def cleanString(bb : ByteBuffer) : String = { + val chars = (new String(Util.getArrayFromBuffer(bb))).toCharArray + var result = "" + for (char <- chars) { + if (char.asInstanceOf[Int] != 0) { + result += char + } + } + result + } +} + +object WhaleReport { + val ERROR_MESSAGE = "500 Internal Server Error" +} \ No newline at end of file diff --git a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WorstRuntimes.scala b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WorstRuntimes.scala index 3d7bc538e3d..81ed127f27d 100644 --- a/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WorstRuntimes.scala +++ b/zipkin-hadoop/src/main/scala/com/twitter/zipkin/hadoop/WorstRuntimes.scala @@ -18,7 +18,7 @@ package com.twitter.zipkin.hadoop import com.twitter.scalding._ import com.twitter.zipkin.gen.{Span, Constants, Annotation} -import sources.{PrepNoNamesSpanSource} +import com.twitter.zipkin.hadoop.sources.{PrepNoNamesSpanSource} /** * Obtain the IDs and the durations of the one hundred service calls which take the longest per service diff --git a/zipkin-hadoop/src/scripts/run.sh b/zipkin-hadoop/src/scripts/run.sh index 52cc424654b..070cc113a4b 100755 --- a/zipkin-hadoop/src/scripts/run.sh +++ b/zipkin-hadoop/src/scripts/run.sh @@ -66,6 +66,5 @@ $DIR/run_job.sh -j PopularKeys -d $ENDTIME -o $OUTPUT/PopularKeys & $DIR/run_job.sh -j PopularAnnotations -d $ENDTIME -o $OUTPUT/PopularAnnotations & $DIR/run_job.sh -j FindIDtoName -p -d $ENDTIME $DIR/run_job.sh -j DependencyTree -d $ENDTIME -o $OUTPUT/DependencyTree & -$DIR/run_job.sh -j MostCommonCalls -d $ENDTIME -o $OUTPUT/MostCommonCalls & $DIR/run_job.sh -j Timeouts -s "--error_type finagle.timeout" -o $OUTPUT/Timeouts -d $ENDTIME & $DIR/run_job.sh -j Timeouts -s "--error_type finagle.retry" -o $OUTPUT/Retries -d $ENDTIME & diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/DependencyTreeSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/DependencyTreeSpec.scala index ff27f01f215..1eac6801361 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/DependencyTreeSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/DependencyTreeSpec.scala @@ -22,7 +22,7 @@ import com.twitter.scalding._ import gen.AnnotationType import scala.collection.JavaConverters._ import collection.mutable.HashMap -import sources.{PrepTsvSource, PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, PreprocessedSpanSource, Util} /** * Tests that DependencyTree finds all service calls and how often per pair diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ExpensiveEndpointsSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ExpensiveEndpointsSpec.scala index 6d294db2734..fa4dab0e9bc 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ExpensiveEndpointsSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ExpensiveEndpointsSpec.scala @@ -29,7 +29,7 @@ import com.twitter.scalding.RichDate import com.twitter.zipkin.gen.AnnotationType import com.twitter.scalding.JobTest import com.twitter.scalding.Tsv -import sources.{PrepTsvSource, PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, PreprocessedSpanSource, Util} /** * Tests that ExpensiveEndpointSpec finds the average run time of each service diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/MemcacheRequestSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/MemcacheRequestSpec.scala index 84e53bb7e31..5c8fcb6e3d2 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/MemcacheRequestSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/MemcacheRequestSpec.scala @@ -20,7 +20,7 @@ import org.specs.Specification import com.twitter.zipkin.gen import com.twitter.scalding._ import gen.AnnotationType -import sources.{PrepNoNamesSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepNoNamesSpanSource, Util} import scala.collection.JavaConverters._ import collection.mutable.HashMap import java.nio.ByteBuffer diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularAnnotationsSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularAnnotationsSpec.scala index 2ee1c15fa1b..48cd66f1a08 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularAnnotationsSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularAnnotationsSpec.scala @@ -20,7 +20,7 @@ import org.specs.Specification import com.twitter.zipkin.gen import com.twitter.scalding._ import gen.AnnotationType -import sources.{PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PreprocessedSpanSource, Util} import scala.collection.JavaConverters._ import scala.collection.mutable._ diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularKeysSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularKeysSpec.scala index 04a325e2e69..0156240cf12 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularKeysSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/PopularKeysSpec.scala @@ -20,7 +20,7 @@ import org.specs.Specification import com.twitter.zipkin.gen import com.twitter.scalding._ import gen.AnnotationType -import sources.{PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PreprocessedSpanSource, Util} import scala.collection.JavaConverters._ import scala.collection.mutable._ diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ServerResponsetimeSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ServerResponsetimeSpec.scala index db5e9a70b5a..106793d5aba 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ServerResponsetimeSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/ServerResponsetimeSpec.scala @@ -4,8 +4,7 @@ package com.twitter.zipkin.hadoop import org.specs.Specification import com.twitter.zipkin.gen import com.twitter.scalding._ -import gen.AnnotationType -import sources.{SpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{SpanSource, Util} import scala.collection.JavaConverters._ class ServerResponsetimeSpec extends Specification with TupleConversions { diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/TimeoutsSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/TimeoutsSpec.scala index 3103371b00a..5c1f66e1761 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/TimeoutsSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/TimeoutsSpec.scala @@ -23,7 +23,7 @@ import com.twitter.scalding._ import gen.AnnotationType import scala.collection.JavaConverters._ import scala.collection.mutable._ -import sources.{PrepTsvSource, PreprocessedSpanSource, Util} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, PreprocessedSpanSource, Util} /** * Tests that Timeouts finds the service calls where timeouts occur and how often diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/UtilSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/UtilSpec.scala index d2e77f4b00a..2bd27e488fd 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/UtilSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/UtilSpec.scala @@ -1,9 +1,9 @@ package com.twitter.zipkin.hadoop import org.specs.Specification -import sources.Util +import com.twitter.zipkin.hadoop.sources.Util import com.twitter.zipkin.gen -import gen.{AnnotationType, Annotation} +import com.twitter.zipkin.gen.{AnnotationType, Annotation} import scala.collection.JavaConverters._ class UtilSpec extends Specification { diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/CommonServiceCallsSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WhaleReportSpec.scala similarity index 51% rename from zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/CommonServiceCallsSpec.scala rename to zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WhaleReportSpec.scala index d4d86be1c7e..ca1bdca2c61 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/CommonServiceCallsSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WhaleReportSpec.scala @@ -21,53 +21,63 @@ import com.twitter.zipkin.gen import com.twitter.scalding._ import gen.AnnotationType import scala.collection.JavaConverters._ -import collection.mutable.HashMap -import sources.{PrepTsvSource, PreprocessedSpanSource, Util} +import collection.mutable.{HashMap, HashSet} +import com.twitter.zipkin.hadoop.sources.{PrepTsvSource, PreprocessedSpanSource, Util} +import java.nio.ByteBuffer +import java.util.Arrays /** -* Tests that MostCommonCalls finds the most commonly called services per service -*/ + * Tests that WhaleReport finds traces with 500 Internal Service Errors and finds the spans in those traces with finagle.retry or finagle.timeouts. + */ -class CommonServiceCallsSpec extends Specification with TupleConversions { +class WhaleReportSpec extends Specification with TupleConversions { noDetailedDiffs() implicit val dateRange = DateRange(RichDate(123), RichDate(321)) + val buf = ByteBuffer.allocate(100); + + // Create a character ByteBuffer + val cbuf = buf.asCharBuffer(); + + // Write a string + cbuf.put(WhaleReport.ERROR_MESSAGE); + val endpoint = new gen.Endpoint(123, 666, "service") val endpoint1 = new gen.Endpoint(123, 666, "service1") val endpoint2 = new gen.Endpoint(123, 666, "service2") val span = new gen.SpanServiceName(12345, "methodcall", 666, - List(new gen.Annotation(1000, "cs").setHost(endpoint), new gen.Annotation(2000, "sr").setHost(endpoint), new gen.Annotation(3000, "ss").setHost(endpoint), new gen.Annotation(4000, "cr").setHost(endpoint)).asJava, - List[gen.BinaryAnnotation]().asJava, "service") - val span1 = new gen.SpanServiceName(123456, "methodcall", 666, + List(new gen.Annotation(1000, "finagle.timeout").setHost(endpoint), new gen.Annotation(1001, "sr").setHost(endpoint), new gen.Annotation(1002, "ss").setHost(endpoint), new gen.Annotation(1003, "cr").setHost(endpoint)).asJava, + List[gen.BinaryAnnotation]( new gen.BinaryAnnotation("http.responsecode", buf, AnnotationType.BOOL ) ).asJava, "service") + val span1 = new gen.SpanServiceName(12345, "methodcall", 666, List(new gen.Annotation(1000, "cs").setHost(endpoint2), new gen.Annotation(2000, "sr").setHost(endpoint2), new gen.Annotation(4000, "ss").setHost(endpoint2), new gen.Annotation(5000, "cr").setHost(endpoint2)).asJava, List(new gen.BinaryAnnotation("bye", null, AnnotationType.BOOL)).asJava, "service2") - val span2 = new gen.SpanServiceName(1234567, "methodcall", 666, - List(new gen.Annotation(1000, "cs").setHost(endpoint2), new gen.Annotation(3000, "cr").setHost(endpoint2)).asJava, + val span2 = new gen.SpanServiceName(12345, "methodcall", 666, + List(new gen.Annotation(1000, "finagle.retry").setHost(endpoint2), new gen.Annotation(3000, "cr").setHost(endpoint2)).asJava, List(new gen.BinaryAnnotation("bye", null, AnnotationType.BOOL)).asJava, "service2") - val spans = (Util.repeatSpan(span, 30, 32, 1) ++ Util.repeatSpan(span1, 50, 100, 32)) + val spans = (Util.repeatSpan(span, 0, 32, 1) ++ Util.repeatSpan(span1, 0, 100, 32) ++ Util.repeatSpan(span2, 0, 200, 100)) - "MostCommonCalls" should { - "Return the most common service calls" in { - JobTest("com.twitter.zipkin.hadoop.MostCommonCalls"). + "WhaleReport" should { + "Return fail whales!" in { + JobTest("com.twitter.zipkin.hadoop.WhaleReport"). arg("input", "inputFile"). arg("output", "outputFile"). arg("date", "2012-01-01T01:00"). source(PreprocessedSpanSource(), spans). source(PrepTsvSource(), Util.getSpanIDtoNames(spans)). - sink[(String, String, Long)](Tsv("outputFile")) { - val result = new HashMap[String, Long]() - result("service, null") = 0 - result("service2, null") = 0 - result("service2, service1") = 0 + sink[(Long, List[String])](Tsv("outputFile")) { + var result = new HashSet[String]() + var actual = new HashSet[String]() + result += "service" + result += "service2" outputBuffer => outputBuffer foreach { e => - result(e._1 + ", " + e._2) = e._3 + e._1 mustEqual 12345 + for (name <- e._2) + actual += name } - result("service, null") mustEqual 31 - result("service2, null") mustEqual 20 - result("service2, service") mustEqual 31 - } + actual mustEqual result }.run.finish } } +} diff --git a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WorstRuntimesSpec.scala b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WorstRuntimesSpec.scala index eb1155690da..e87a4196cfb 100644 --- a/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WorstRuntimesSpec.scala +++ b/zipkin-hadoop/src/test/scala/com/twitter/zipkin/hadoop/WorstRuntimesSpec.scala @@ -22,7 +22,7 @@ import com.twitter.scalding._ import scala.collection.JavaConverters._ import collection.mutable.HashMap import com.twitter.zipkin.gen.AnnotationType -import sources.{Util, PrepNoNamesSpanSource} +import com.twitter.zipkin.hadoop.sources.{Util, PrepNoNamesSpanSource} /** * Tests that WorstRuntimes finds the spans which take the longest to run