Skip to content
This repository has been archived by the owner on Jul 10, 2024. It is now read-only.

Report packet-capture statistics by host #179

Merged
merged 12 commits into from
Nov 11, 2022
111 changes: 105 additions & 6 deletions apidump/summary.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import (
"fmt"
"sort"

"github.com/akitasoftware/akita-libs/client_telemetry"
"github.com/akitasoftware/go-utils/math"
"github.com/spf13/viper"

"github.com/akitasoftware/akita-cli/pcap"
"github.com/akitasoftware/akita-cli/printer"
"github.com/akitasoftware/akita-cli/trace"
"github.com/spf13/viper"
)

// Captures apidump progress.
Expand Down Expand Up @@ -71,16 +74,33 @@ func (s *Summary) PrintPacketCounts() {
// TODO: it would be nice to show hostnames if we have them? To more clearly
// identify the traffic.
thatplguy marked this conversation as resolved.
Show resolved Hide resolved
func (s *Summary) PrintPacketCountHighlights() {
top := s.FilterSummary.Summary(20)
summaryLimit := 20
top := s.FilterSummary.Summary(summaryLimit)

totalTraffic := top.Total.TCPPackets
if totalTraffic == 0 {
// PrintWarnings already covers this case
return
}

// If we hit the limit of the number of ports we tracked, mention so.
// This should (hopefully) be unlikely.
if top.ByPortOverflow != nil {
printer.Stderr.Infof(
"More than %d ports with traffic. Showing the top %d of the first %d.\n",
top.ByPortOverflowLimit, math.Min(summaryLimit, top.ByPortOverflowLimit), top.ByPortOverflowLimit,
)
}

s.printPortHighlights(top)
s.printHostHighlights(top)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we clarify to the user that these highlights do not describe disjoint sets of traffic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added section labels to that effect in dd88251.

}

func (s *Summary) printPortHighlights(top *client_telemetry.PacketCountSummary) {
totalTraffic := top.Total.TCPPackets

// Sort by TCP traffic volume and list in descending order.
// This is already sorted in topNByTcpPacketCount but that ordering
// This is already sorted in TopN but that ordering
// doesn't seem accessible here.
ports := make([]int, 0, len(top.TopByPort))
for p := range top.TopByPort {
Expand All @@ -90,18 +110,18 @@ func (s *Summary) PrintPacketCountHighlights() {
return top.TopByPort[ports[i]].TCPPackets > top.TopByPort[ports[j]].TCPPackets
})

totalListed := 0
totalListedForPorts := 0
for i, p := range ports {
thisPort := top.TopByPort[p]
pct := thisPort.TCPPackets * 100 / totalTraffic
totalListed += thisPort.TCPPackets
totalListedForPorts += thisPort.TCPPackets

// Stop when the running total would be >100%. (Each packet is counted both
// in the source port and in the destination port; we want to avoid
// showing a bunch of ephemeral ports even if they're all above the threshold.)
//
// Before that limit is hit, list at least two sources, but stop when less than 3% of traffic.
if (totalListed > totalTraffic) || (pct < 3 && i >= 2) {
if (totalListedForPorts > totalTraffic) || (pct < 3 && i >= 2) {
break
}

Expand Down Expand Up @@ -148,6 +168,85 @@ func (s *Summary) PrintPacketCountHighlights() {
}
}

// XXX(cns): Not all metrics can be associated with a host. We currently have
// HTTP requests and TLS handshakes.
func (s *Summary) printHostHighlights(top *client_telemetry.PacketCountSummary) {
// Sort by HTTP traffic volume, then TLS handshake counts, both descending.
// We do not have TCP packet counts for hosts.
hosts := make([]string, 0, len(top.TopByHost))
totalCountForHosts := 0
for h, c := range top.TopByHost {
hosts = append(hosts, h)
totalCountForHosts += c.HTTPRequests + c.HTTPResponses + c.TLSHello
}
sort.Slice(hosts, func(i, j int) bool {
left := top.TopByHost[hosts[i]]
right := top.TopByHost[hosts[j]]

if left.HTTPRequests != right.HTTPRequests {
return left.HTTPRequests > right.HTTPRequests
} else if left.HTTPResponses != right.HTTPResponses {
return left.HTTPResponses > right.HTTPResponses
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we sort by the sum instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Changed in bf4d2b3.

} else {
return left.TLSHello > right.TLSHello
}
})

// Take the first N hosts capturing at least 97% of the data. This avoids
// a long tail of hosts with very few TLS handshakes.
printUpTo := 0
longestHostLength := 0
countSoFar := 0
for i, h := range hosts {
thisHost := top.TopByHost[h]
countSoFar += thisHost.HTTPRequests + thisHost.HTTPResponses + thisHost.TLSHello
pctSoFar := countSoFar * 100 / totalCountForHosts

if 97 < pctSoFar && i >= 2 {
break
}

printUpTo = i + 1
longestHostLength = math.Max(longestHostLength, len(h))
}

for _, h := range hosts[:printUpTo] {
thisHost := top.TopByHost[h]
label := fmt.Sprintf("Host %-*s", longestHostLength, h)

// If we saw any HTTP traffic, report that. But, if there's a high
// percentage of TLS handshakes, note that too. Hosts don't have
// counts for unparsed packets.
if thisHost.HTTPRequests+thisHost.HTTPResponses > 0 {
printer.Stderr.Infof("%s %d HTTP requests, %d TLS handshakes.\n",
label, thisHost.HTTPRequests, thisHost.TLSHello)
if thisHost.TLSHello > 0 {
printer.Stderr.Infof("%s appears to contain a mix of encrypted and unencrypted traffic.\n", label)
}
continue
}

// If we saw HTTP traffic but it was filtered, give the pre-filter statistics.
preFilter := s.PrefilterSummary.TotalOnHost(h)
if preFilter.HTTPRequests+preFilter.HTTPResponses > 0 {
printer.Stderr.Infof("%s no HTTP requests satisfied all the filters you gave, but %d HTTP requests were seen before your path and host filters were applied.\n",
label, preFilter.HTTPRequests)
continue
}

// If we saw TLS, report the presence of encrypted traffic
if thisHost.TLSHello > 0 {
printer.Stderr.Infof("%s no HTTP requests, %d TLS handshakes indicating encrypted traffic.\n",
label, thisHost.TLSHello)
continue
}

// Flag as unparsable
printer.Stderr.Infof("%s no HTTP requests or responses; the data to this service could not be parsed.\n",
label)
}
}

// Prints warnings based on packet capture behavior, such as not capturing
// any packets, capturing packets but failing to parse them, etc.
func (s *Summary) PrintWarnings() {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/OneOfOne/xxhash v1.2.8
github.com/Pallinder/go-randomdata v1.2.0
github.com/akitasoftware/akita-ir v0.0.0-20220630210013-8926783978fe
github.com/akitasoftware/akita-libs v0.0.0-20221109215053-bb7c4fbe2f9c
github.com/akitasoftware/akita-libs v0.0.0-20221111053102-849d2e280045
github.com/akitasoftware/go-utils v0.0.0-20220606224752-aad0f81bb9e7
github.com/akitasoftware/plugin-flickr v0.2.0
github.com/andybalholm/brotli v1.0.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ github.com/akitasoftware/akita-libs v0.0.0-20221107222856-a90a0970b256 h1:vbeTOK
github.com/akitasoftware/akita-libs v0.0.0-20221107222856-a90a0970b256/go.mod h1:Sjt1jp10Tvhpi/TcDAOmqABRpbcvp9uFz07M+bjvJzA=
github.com/akitasoftware/akita-libs v0.0.0-20221109215053-bb7c4fbe2f9c h1:ZysezWqeqISAkxzW5AhG+AHGDuaVOF1nrg0Vms1BT7Q=
github.com/akitasoftware/akita-libs v0.0.0-20221109215053-bb7c4fbe2f9c/go.mod h1:Sjt1jp10Tvhpi/TcDAOmqABRpbcvp9uFz07M+bjvJzA=
github.com/akitasoftware/akita-libs v0.0.0-20221111053102-849d2e280045 h1:vMWr6ePyXocxbYs6AtlI0M8gobnCgJAys37nJS84i6c=
github.com/akitasoftware/akita-libs v0.0.0-20221111053102-849d2e280045/go.mod h1:Sjt1jp10Tvhpi/TcDAOmqABRpbcvp9uFz07M+bjvJzA=
github.com/akitasoftware/go-utils v0.0.0-20220606224752-aad0f81bb9e7 h1:v2iX9e9Bv6e3hUQz3zCkqpO9SQkMpLPu5gWJG12J5Zs=
github.com/akitasoftware/go-utils v0.0.0-20220606224752-aad0f81bb9e7/go.mod h1:+IOXf7l/QCAQECJzjJwhTp1sBkRoJ6WciZwJezUwBa4=
github.com/akitasoftware/gopacket v1.1.18-0.20210730205736-879e93dac35b h1:toBhS5rhCjo/N4YZ1cYtlsdSTGjMFH+gbJGCc+OmZiY=
Expand Down
36 changes: 33 additions & 3 deletions trace/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ package trace

import (
"math"
"sort"
"strconv"

"github.com/OneOfOne/xxhash"
"github.com/akitasoftware/akita-libs/client_telemetry"

"github.com/akitasoftware/akita-cli/util"
"github.com/akitasoftware/akita-libs/akid"
"github.com/akitasoftware/akita-libs/akinet"

"github.com/akitasoftware/akita-cli/util"
)

type Collector interface {
Expand Down Expand Up @@ -101,24 +103,52 @@ type PacketCountCollector struct {
}

func (pc *PacketCountCollector) Process(t akinet.ParsedNetworkTraffic) error {
switch t.Content.(type) {
switch c := t.Content.(type) {
case akinet.HTTPRequest:
pc.PacketCounts.Update(client_telemetry.PacketCounts{
Interface: t.Interface,
DstHost: c.Host,
SrcPort: t.SrcPort,
DstPort: t.DstPort,
HTTPRequests: 1,
})
case akinet.HTTPResponse:
// TODO(cns): There's no easy way to get the host here to count HTTP
// responses. Revisit this if we ever add a pass to pair HTTP
// requests and responses independently of the backend collector.
pc.PacketCounts.Update(client_telemetry.PacketCounts{
Interface: t.Interface,
SrcPort: t.SrcPort,
DstPort: t.DstPort,
HTTPResponses: 1,
})
case akinet.TLSClientHello, akinet.TLSServerHello:
case akinet.TLSClientHello:
var dstHost string
if c.Hostname != nil {
dstHost = *c.Hostname
}

pc.PacketCounts.Update(client_telemetry.PacketCounts{
Interface: t.Interface,
DstHost: dstHost,
SrcPort: t.SrcPort,
DstPort: t.DstPort,
TLSHello: 1,
})
case akinet.TLSServerHello:
// Ideally, we would pick the DNS name the client used in the
// Client Hello, but we don't pair those messages. Barring that, any
// of the DNS names will serve as a reasonable identifier. Pick the
// largest, which avoids "*" prefixes when possible.
var dstHost string
if 0 < len(c.DNSNames) {
sort.Strings(c.DNSNames)
dstHost = c.DNSNames[len(c.DNSNames)-1]
}

pc.PacketCounts.Update(client_telemetry.PacketCounts{
Interface: t.Interface,
DstHost: dstHost,
SrcPort: t.SrcPort,
DstPort: t.DstPort,
TLSHello: 1,
Expand Down
Loading