Skip to content

Commit

Permalink
Final submission
Browse files Browse the repository at this point in the history
Initiating a dbt project

Removing missing key and upload generated data

Adding setup.sql

This script configures requirements on snowflake in lieu of terraform (at a later date).

Configuring some helper packages

Useful for auto-generation when scaffolding projects

Setting up the source data

Generating base models

Modelling aggregated_customer_purchases

Adding some documentation and basic tests

Initiate terraform

updating the .gitignore for terraform related files

Setting up state storage/lock

Migrating tfstate to s3 backend

Adding iam resources

Adding a bucket for snowflake data.
  • Loading branch information
rudeb0y committed Apr 23, 2021
1 parent f37d13b commit ee0f21b
Show file tree
Hide file tree
Showing 31 changed files with 722 additions and 28 deletions.
11 changes: 11 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# https://EditorConfig.org

root = true

[*]
charset = utf-8
end_of_line = lf
indent_style = space
tab_width = 4
trim_trailing_whitespace = true
insert_final_newline = true
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,38 @@ output_data/
# IDEs and Editors
.idea/
.vscode/

# Local .terraform directories
**/.terraform/*

# .tfstate files
*.tfstate
*.tfstate.*

# Crash log files
crash.log

# Exclude all .tfvars files, which are likely to contain sentitive data, such as
# password, private keys, and other secrets. These should not be part of version
# control as they are data points which are potentially sensitive and subject
# to change depending on the environment.
#
*.tfvars

# Ignore override files as they are usually used to override resources locally and so
# are not checked in
override.tf
override.tf.json
*_override.tf
*_override.tf.json

# Include override files you do wish to add to version control using negated pattern
#
# !example_override.tf

# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
# example: *tfplan*

# Ignore CLI configuration files
.terraformrc
terraform.rc
65 changes: 65 additions & 0 deletions .tools/upload_to_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/python3

import logging
import boto3
from botocore.exceptions import ClientError
import os
import glob
import sys


def upload_files(base_path, bucket):
"""
Keeps the stucture relative to path on s3.
"""
count = 0

len_base_parts = len(base_path.split("/"))-1
for root, dirs, files in os.walk(base_path):
for file in files:
s3_key = "/".join(root.split("/")[len_base_parts:])

print(f"Uploading {root}/{file} to s3://{bucket}/{s3_key}/{file}..")
upload_file(
os.path.join(root, file), bucket, os.path.join(s3_key, file)
)
count += 1

print(f"Done. {count} files uploaded.")

def upload_file(file_name, bucket, object_name=None):
"""Upload a file to an S3 bucket
:param file_name: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_name is used
:return: True if file was uploaded, else False
"""

# If S3 object_name was not specified, use file_name
if object_name is None:
object_name = file_name

# Upload the file
s3_client = boto3.client("s3")
try:
response = s3_client.upload_file(file_name, bucket, object_name)
except ClientError as e:
logging.error(e)
return False
return True


# Keeping this really simple for speed. I know there's better ways
# which would totally implement in production environment!"
# should test that the file exists etc.
# should also be extablishing a session with boto3 etc..
def main(bucket=None):

data_root_path = "input_data/starter/"

upload_files(data_root_path, bucket)


if __name__ == "__main__":
main(sys.argv[1])
39 changes: 39 additions & 0 deletions infra/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions infra/dynamo_tables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table

locals {
statelock_table_name = "${var.aws_dynamodb_table}-${random_integer.seed.result}"
}

resource "aws_dynamodb_table" "terraform_statelock" {
name = local.statelock_table_name
read_capacity = 20
write_capacity = 20
hash_key = "LockID"

attribute {
name = "LockID"
type = "S"
}

tags = var.default_tags
}

output "state_dynamo_table" {
value = aws_dynamodb_table.terraform_statelock.id
}
13 changes: 13 additions & 0 deletions infra/iam_group_memberships.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_group_membership

resource "aws_iam_group_membership" "full_access" {
name = "${local.bucket_name}-full-access"
users = var.full_access_users
group = aws_iam_group.bucket_full_access.name
}

resource "aws_iam_group_membership" "read_only" {
name = "${local.bucket_name}-read-only"
users = var.read_only_users
group = aws_iam_group.bucket_read_only.name
}
63 changes: 63 additions & 0 deletions infra/iam_group_policies.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_group_policy

resource "aws_iam_group_policy" "full_access" {
name = "${local.bucket_name}-full-access"
group = aws_iam_group.bucket_full_access.id

policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": "s3:*",
"Resource": [
"arn:aws:s3:::${local.bucket_name}",
"arn:aws:s3:::${local.bucket_name}/*"
]
},
{
"Effect": "Allow",
"Action": ["dynamodb:*"],
"Resource": [
"${aws_dynamodb_table.terraform_statelock.arn}"
]
}
]
}
EOF

}

resource "aws_iam_group_policy" "read_only" {
name = "${local.bucket_name}-read_only"
group = aws_iam_group.bucket_full_access.id

policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:Get*",
"s3:List*"
],
"Resource": [
"arn:aws:s3:::${local.bucket_name}",
"arn:aws:s3:::${local.bucket_name}/*"
]
}
]
}
EOF

}

output "s3_bucket" {
value = aws_s3_bucket.state_bucket.bucket
}

output "dynamodb_statelock" {
value = aws_s3_bucket.state_bucket.bucket
}
14 changes: 14 additions & 0 deletions infra/iam_groups.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_group

resource "aws_iam_group" "bucket_full_access" {
name = "${local.bucket_name}-full-access"

}

resource "aws_iam_group" "bucket_read_only" {
name = "${local.bucket_name}-read-only"
}

# Notes:
# We would probably want a `developers` group and
# grant the correct privs to just that environment. A future activity.
18 changes: 18 additions & 0 deletions infra/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# https://registry.terraform.io/providers/hashicorp/aws/latest

terraform {
backend "s3" {
key = "networking/dev/terraform.tfstate"
}

required_providers {
aws = {
source = "hashicorp/aws"
version = "3.37.0"
}
}
}

provider "aws" {
region = var.region
}
6 changes: 6 additions & 0 deletions infra/random_generator.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

# ensure some global uniqueness. Used for statefile and dynamodb statelock
resource "random_integer" "seed" {
min = 1000
max = 9999
}
48 changes: 48 additions & 0 deletions infra/s3_buckets.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket

locals {
bucket_name = "${var.aws_bucket_prefix}-${random_integer.seed.result}"
}

# TODO: use KMS to encrypt bucket etc.
resource "aws_s3_bucket" "state_bucket" {
bucket = local.bucket_name
acl = "private"
force_destroy = false

versioning {
enabled = true
}

tags = var.default_tags
}

# TODO - did not work as expected - ideally block ability to make objects public.
# can mitigate with iam set-up..
resource "aws_s3_bucket_public_access_block" "s3_no_public_access" {
bucket = aws_s3_bucket.state_bucket.id
block_public_acls = true
block_public_policy = true
}

# keeping simple for berevity for the tech-demo - however as per the tfstate
# set-up, we can set more things here. See doclink above.
# if this was a real set-up, we'd probably modularize all buckets to avoid
# repetition and enforce standards.
resource "aws_s3_bucket" "demo_data_bucket" {
bucket = "iw-demo-data"

acl = "private"
force_destroy = false

versioning {
enabled = true
}

tags = var.default_tags

}

output "state_bucket" {
value = aws_s3_bucket.state_bucket.id
}
40 changes: 40 additions & 0 deletions infra/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
variable "aws_bucket_prefix" {
type = string
default = "infinityworks-demo"
}

variable "aws_dynamodb_table" {
type = string
default = "infinityworks-demo-tfstatelock"
}

# Best Practice - autotag resources
variable "default_tags" {
type = map
default = {
key: "value",
Name: "Value",
managed_by_terraform: true,
department: "Data Engineering"
}
}

# Depending how you opt to provision resources, this can be very useful.
# different aws accounts/zones or shared?
variable "environment" {
type = string
default = "dev"
}

variable "full_access_users" {
type = list(string)
}

variable "read_only_users" {
type = list(string)
}

variable "region" {
type = string
default = "eu-west-1"
}
1 change: 0 additions & 1 deletion input_data_generator/main_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
+ ["fruit_veg"] * 25
+ ["sweets"] * 20
+ ["food"] * 25
+ ["bws"] * 10
)

gen_id = "starter"
Expand Down
Loading

0 comments on commit ee0f21b

Please sign in to comment.