From d1d91e8f35ff12a2a008d3be30ed7ee16afae4bb Mon Sep 17 00:00:00 2001 From: Scott Bommarito Date: Tue, 5 Feb 2019 14:30:51 -0800 Subject: [PATCH] ChinaStatsCollector should properly reformat scstatus of log lines (#713) --- src/Stats.AzureCdnLogs.Common/CdnLogEntryParser.cs | 10 ++++++---- .../ChinaStatsCollector.cs | 12 +++++------- .../ChinaCollectorTests.cs | 7 +++---- .../CdnLogEntryParserFacts.cs | 6 +++++- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Stats.AzureCdnLogs.Common/CdnLogEntryParser.cs b/src/Stats.AzureCdnLogs.Common/CdnLogEntryParser.cs index 56a4311f1..42fb5bcfe 100644 --- a/src/Stats.AzureCdnLogs.Common/CdnLogEntryParser.cs +++ b/src/Stats.AzureCdnLogs.Common/CdnLogEntryParser.cs @@ -105,12 +105,14 @@ public static CdnLogEntry ParseLogEntryFromLine(int lineNumber, string line, Act // small margin of error caused by non-200 HTTP status codes. if (entry.CacheStatusCode != null) { - // Format: cache status + "/" + HTTP status code - // Example: "TCP_MISS/504" + // Previously, we were not correctly converting logs from China CDN to the format used by Global CDN, so we must support both formats. + // Global format: cache status + "/" + HTTP status code + // Global example: "TCP_MISS/504" + // China format: HTTP status code + // China example: "504" var slashIndex = entry.CacheStatusCode.LastIndexOf('/'); uint httpStatusCode; - if (slashIndex >= 0 - && slashIndex + 1 < entry.CacheStatusCode.Length + if (slashIndex + 1 < entry.CacheStatusCode.Length && uint.TryParse(entry.CacheStatusCode.Substring(slashIndex + 1), out httpStatusCode) && (httpStatusCode < 200 || httpStatusCode >= 300)) { diff --git a/src/Stats.CollectAzureChinaCDNLogs/ChinaStatsCollector.cs b/src/Stats.CollectAzureChinaCDNLogs/ChinaStatsCollector.cs index d41560ae7..5ec5fc0c1 100644 --- a/src/Stats.CollectAzureChinaCDNLogs/ChinaStatsCollector.cs +++ b/src/Stats.CollectAzureChinaCDNLogs/ChinaStatsCollector.cs @@ -47,7 +47,7 @@ public override OutputLogLine TransformRawLogLine(string line) string.IsNullOrEmpty(line) || line.Trim().StartsWith("c-ip", ignoreCase: true, culture: System.Globalization.CultureInfo.InvariantCulture)) { - //is the header + // Ignore empty lines or the header return null; } @@ -59,11 +59,9 @@ public override OutputLogLine TransformRawLogLine(string line) DateTime dt = DateTime.Parse(timestamp, CultureInfo.InvariantCulture, System.Globalization.DateTimeStyles.AdjustToUniversal); string timeStamp2 = ToUnixTimeStamp(dt); - //ignore 400 error codes - if(segments[5] == "400") - { - return null; - } + // Global status code format: cache status + "/" + HTTP status code + // China status code format: HTTP status code + var scstatus = segments[(int)ChinaLogHeaderFields.hitmiss] + "/" + segments[(int)ChinaLogHeaderFields.scstatus]; return new OutputLogLine(timestamp: timeStamp2, timetaken: notAvailableInt, @@ -71,7 +69,7 @@ public override OutputLogLine TransformRawLogLine(string line) filesize: notAvailableInt, sip: segments[(int)ChinaLogHeaderFields.sip], sport: notAvailableInt, - scstatus: segments[(int)ChinaLogHeaderFields.scstatus], + scstatus: scstatus, scbytes: segments[(int)ChinaLogHeaderFields.scbytes], csmethod: segments[(int)ChinaLogHeaderFields.csmethod], csuristem: segments[(int)ChinaLogHeaderFields.csuristem], diff --git a/tests/Tests.Stats.CollectAzureChinaCDNLogs/ChinaCollectorTests.cs b/tests/Tests.Stats.CollectAzureChinaCDNLogs/ChinaCollectorTests.cs index c1515aea1..cc2c894f4 100644 --- a/tests/Tests.Stats.CollectAzureChinaCDNLogs/ChinaCollectorTests.cs +++ b/tests/Tests.Stats.CollectAzureChinaCDNLogs/ChinaCollectorTests.cs @@ -9,11 +9,10 @@ namespace Tests.Stats.CollectAzureChinaCDNLogs public class ChinaCollectorTests { [Theory] - [InlineData("40.125.202.231,7/27/2017 4:50:09 PM +00:00,GET,\"/v3-flatcontainer/system.net.primitives/index.json\",HTTP/1.1,200,1196,\"-\",\"NuGet+Command+Line/4.3.0+(Microsoft+Windows+NT+6.2.9200.0)\",133,TCP_MISS,118.180.6.168", "1501174209 0 40.125.202.231 0 118.180.6.168 0 200 1196 GET /v3-flatcontainer/system.net.primitives/index.json - 133 0 - NuGet+Command+Line/4.3.0+(Microsoft+Windows+NT+6.2.9200.0) na na")] - [InlineData("40.125.202.231,7/27/2017 4:50:09 PM +00:00,GET,\"/v3-flatcontainer/system.net.primitives/index.json\",HTTP/1.1,400,1196,\"-\",\"NuGet+Command+Line/4.3.0+(Microsoft+Windows+NT+6.2.9200.0)\",133,TCP_MISS,118.180.6.168", null)] + [InlineData("40.125.202.231,7/27/2017 4:50:09 PM +00:00,GET,\"/v3-flatcontainer/system.net.primitives/index.json\",HTTP/1.1,200,1196,\"-\",\"NuGet+Command+Line/4.3.0+(Microsoft+Windows+NT+6.2.9200.0)\",133,TCP_MISS,118.180.6.168", "1501174209 0 40.125.202.231 0 118.180.6.168 0 TCP_MISS/200 1196 GET /v3-flatcontainer/system.net.primitives/index.json - 133 0 - NuGet+Command+Line/4.3.0+(Microsoft+Windows+NT+6.2.9200.0) na na")] [InlineData("c-ip, timestamp, cs-method, cs-uri-stem, http-ver, sc-status, sc-bytes, c-referer, c-user-agent, rs-duration(ms), hit-miss, s-ip", null)] - [InlineData("66.102.6.172,7/27/2017 4:50:09 PM +00:00,GET,\"/favicon.ico\",HTTP/1.1,200,726,\"-\",\"Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google+Favicon\",216,TCP_MISS,150.138.143.19", "1501174209 0 66.102.6.172 0 150.138.143.19 0 200 726 GET /favicon.ico - 216 0 - Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google+Favicon na na")] - [InlineData("66.102.6.172,7/27/2017 4:50:09 PM +00:00,GET,\"/favicon.ico\",HTTP/1.1,200,726,\"-\",\"Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google,Favicon\",216,TCP_MISS,150.138.143.19", "1501174209 0 66.102.6.172 0 150.138.143.19 0 200 726 GET /favicon.ico - 216 0 - Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google,Favicon na na")] + [InlineData("66.102.6.172,7/27/2017 4:50:09 PM +00:00,GET,\"/favicon.ico\",HTTP/1.1,200,726,\"-\",\"Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google+Favicon\",216,TCP_MISS,150.138.143.19", "1501174209 0 66.102.6.172 0 150.138.143.19 0 TCP_MISS/200 726 GET /favicon.ico - 216 0 - Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google+Favicon na na")] + [InlineData("66.102.6.172,7/27/2017 4:50:09 PM +00:00,GET,\"/favicon.ico\",HTTP/1.1,200,726,\"-\",\"Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google,Favicon\",216,TCP_MISS,150.138.143.19", "1501174209 0 66.102.6.172 0 150.138.143.19 0 TCP_MISS/200 726 GET /favicon.ico - 216 0 - Mozilla/5.0+ X11;+Linux+x86_64)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Chrome/49.0.2623.75+Safari/537.36+Google,Favicon na na")] public void TransformRawLogLine(string input, string expectedOutput) { var collector = new ChinaStatsCollector(); diff --git a/tests/Tests.Stats.ImportAzureCdnStatistics/CdnLogEntryParserFacts.cs b/tests/Tests.Stats.ImportAzureCdnStatistics/CdnLogEntryParserFacts.cs index f7a71211d..1c9545c5d 100644 --- a/tests/Tests.Stats.ImportAzureCdnStatistics/CdnLogEntryParserFacts.cs +++ b/tests/Tests.Stats.ImportAzureCdnStatistics/CdnLogEntryParserFacts.cs @@ -22,6 +22,11 @@ public class TheParseLogEntryFromLineMethod [InlineData("SOMETHING_ELSE/404")] [InlineData("TCP_MISS/504")] [InlineData("TCP_MISS/604")] + [InlineData("0")] + [InlineData("304")] + [InlineData("400")] + [InlineData("404")] + [InlineData("500")] public void IgnoresNon200HttpStatusCodes(string status) { // Arrange @@ -43,7 +48,6 @@ public void IgnoresNon200HttpStatusCodes(string status) [InlineData("TCP_MISS/")] [InlineData("TCP_MISS")] [InlineData("200")] - [InlineData("500")] public void DoesNotIgnore200LevelAndUnrecognizedHttpStatusCodes(string status) { // Arrange