-
Notifications
You must be signed in to change notification settings - Fork 467
/
Build and Optimize Data Warehouses with BigQuery: Challenge Lab
128 lines (98 loc) · 3.64 KB
/
Build and Optimize Data Warehouses with BigQuery: Challenge Lab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
Firstly FOLLOW ALONG WITH THE VIDEO THEN COPY THESE COMMANDS!
One Important Point - Don't forget to replace <dataset_name> <table_name>
TASK 1 ------------------------------------------------------------------
CREATE OR REPLACE TABLE <dataset_name>.<table_name>
PARTITION BY date
OPTIONS(
partition_expiration_days=360,
description="oxford_policy_tracker table in the COVID 19 Government Response public dataset with an expiry time set to 90 days."
) AS
SELECT
*
FROM
`bigquery-public-data.covid19_govt_response.oxford_policy_tracker`
WHERE
alpha_3_code NOT IN ('GBR', 'BRA', 'CAN','USA')
TASK 2 ---------------------------------------------------------------------
ALTER TABLE <dataset_name>.<table_name>
ADD COLUMN population INT64,
ADD COLUMN country_area FLOAT64,
ADD COLUMN mobility STRUCT<
avg_retail FLOAT64,
avg_grocery FLOAT64,
avg_parks FLOAT64,
avg_transit FLOAT64,
avg_workplace FLOAT64,
avg_residential FLOAT64
>
TASK 3 -----------------------------------------------------------------------
CREATE OR REPLACE TABLE <dataset_name>.pop_data_2019 AS
SELECT
country_territory_code,
pop_data_2019
FROM
`bigquery-public-data.covid19_ecdc.covid_19_geographic_distribution_worldwide`
GROUP BY
country_territory_code,
pop_data_2019
ORDER BY
country_territory_code
2ND QUERY -------------------------------------------------------------------
UPDATE
`<dataset_name>.<table_name>` t0
SET
population = t1.pop_data_2019
FROM
`<dataset_name>.pop_data_2019` t1
WHERE
CONCAT(t0.alpha_3_code) = CONCAT(t1.country_territory_code);
TASK 4 ------------------------------------------------------------------------
UPDATE
`<dataset_name>.<table_name>` t0
SET
t0.country_area = t1.country_area
FROM
`bigquery-public-data.census_bureau_international.country_names_area` t1
WHERE
t0.country_name = t1.country_name
TASK 5 -------------------------------------------------------------------------
UPDATE
`<dataset_name>.<table_name>` t0
SET
t0.mobility.avg_retail = t1.avg_retail,
t0.mobility.avg_grocery = t1.avg_grocery,
t0.mobility.avg_parks = t1.avg_parks,
t0.mobility.avg_transit = t1.avg_transit,
t0.mobility.avg_workplace = t1.avg_workplace,
t0.mobility.avg_residential = t1.avg_residential
FROM
( SELECT country_region, date,
AVG(retail_and_recreation_percent_change_from_baseline) as avg_retail,
AVG(grocery_and_pharmacy_percent_change_from_baseline) as avg_grocery,
AVG(parks_percent_change_from_baseline) as avg_parks,
AVG(transit_stations_percent_change_from_baseline) as avg_transit,
AVG(workplaces_percent_change_from_baseline) as avg_workplace,
AVG(residential_percent_change_from_baseline) as avg_residential
FROM `bigquery-public-data.covid19_google_mobility.mobility_report`
GROUP BY country_region, date
) AS t1
WHERE
CONCAT(t0.country_name, t0.date) = CONCAT(t1.country_region, t1.date)
TASK 6 ----------------------------------------------------------------------------
SELECT country_name, population
FROM `<dataset_name>.<table_name>`
WHERE population is NULL
QUERY 2 ----------------------------
SELECT country_name, country_area
FROM `<dataset_name>.<table_name>`
WHERE country_area IS NULL
QUERY 3 -----------------------------
SELECT DISTINCT country_name
FROM `<dataset_name>.<table_name>`
WHERE population is NULL
UNION ALL
SELECT DISTINCT country_name
FROM `<dataset_name>.<table_name>`
WHERE country_area IS NULL
ORDER BY country_name ASC
You are Doing Great Job!