Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
/ NuGet.Jobs Public archive

Fix for statistics pipline to display proper last 6 weeks data around the new year #712

Merged
merged 5 commits into from
Feb 1, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,39 @@ BEGIN
AND [Year] = @MinYear

DECLARE @Cursor DATETIME = (SELECT ISNULL(MAX([Position]), @ReportGenerationTime) FROM [dbo].[Cursors] (NOLOCK) WHERE [Name] = 'GetDirtyPackageId')

SELECT TOP 6 D.[Year],
DECLARE @MaxDate DATE = DATEADD(DAY, 42, @MinDate);

WITH WeekLookup AS
(
-- If we just take all the rows between @MinDate and @MaxDate from Dimension_Date table
-- we might end up the days from the week that contains 1st of January to have two
-- different week numbers:
-- 12/30/2018 -> 53rd of 2018
-- 12/31/2018 -> 53rd of 2018
-- 1/1/2019 -> 1st of 2019
-- 1/2/2019 -> 1st of 2019
-- etc.
-- which results in the wrong grouping when group by [WeekOfYear] and [Year] is done:
-- the single week gets split into two portions, one for the previous year and another for
-- the new one with aggregations calculated separately for each of the portions.
-- This CTE works around the issue by making sure that all days of the week map to
-- the same [WeekOfYear] and [Year], specifically to that of the first day of that week.
SELECT d.[Id], dd.[WeekOfYear], dd.[Year]
FROM [dbo].[Dimension_Date] AS d WITH(NOLOCK)
CROSS APPLY
(
SELECT TOP(1) [WeekOfYear], [Year]
FROM [dbo].[Dimension_Date] AS d2 WITH(NOLOCK)
WHERE d2.[Date] <= d.[Date] AND d2.[DayOfWeek] = 1
ORDER BY d2.[Date] DESC
) AS dd
WHERE d.[Date] >= @MinDate AND d.[Date] < @MaxDate AND d.[Date] <= @Cursor
)
SELECT D.[Year],
D.[WeekOfYear],
SUM(ISNULL(Facts.[DownloadCount], 0)) 'Downloads'
SUM(ISNULL(Facts.[DownloadCount], 0)) AS [Downloads]
FROM [dbo].[Fact_Download] AS Facts (NOLOCK)

INNER JOIN [dbo].[Dimension_Date] AS D (NOLOCK)
ON D.[Id] = Facts.[Dimension_Date_Id]

WHERE D.[Date] IS NOT NULL
AND ISNULL(D.[Date], CONVERT(DATE, '1900-01-01')) >=
DATETIMEFROMPARTS(
DATEPART(year, @MinDate),
DATEPART(month, @MinDate),
DATEPART(day, @MinDate),
0, 0, 0, 0)
AND ISNULL(D.[Date], CONVERT(DATE, DATEADD(day, 1, @ReportGenerationTime))) <
DATETIMEFROMPARTS(
DATEPART(year, @ReportGenerationTime),
DATEPART(month, @ReportGenerationTime),
DATEPART(day, @ReportGenerationTime),
0, 0, 0, 0)
AND Facts.[Timestamp] <= @Cursor

GROUP BY D.[Year], D.[WeekOfYear]
ORDER BY [Year], [WeekOfYear]

INNER JOIN WeekLookup AS D ON D.Id = Facts.Dimension_Date_Id
GROUP BY D.[Year], D.[WeekOfYear]
ORDER BY [Year], [WeekOfYear]
END