From bb66ada55fb4aa5d08b4e90b6c0b436fe7619d2d Mon Sep 17 00:00:00 2001 From: Elias Kassell Date: Thu, 5 Jan 2023 15:45:12 +0000 Subject: [PATCH] Environments and schedules (#1418) --- api/commands/init.ts | 16 ---- api/commands/validate.ts | 106 --------------------- api/index.ts | 3 - cli/index.ts | 29 ++---- core/targets.ts | 8 +- protos/BUILD | 20 ---- protos/environments.proto | 75 --------------- protos/execution.proto | 1 - protos/schedules.proto | 70 -------------- tests/api/validate.spec.ts | 182 ------------------------------------- 10 files changed, 10 insertions(+), 500 deletions(-) delete mode 100644 api/commands/validate.ts delete mode 100644 protos/environments.proto delete mode 100644 protos/schedules.proto diff --git a/api/commands/init.ts b/api/commands/init.ts index d5444153b..d8a42344b 100644 --- a/api/commands/init.ts +++ b/api/commands/init.ts @@ -20,8 +20,6 @@ export interface IInitResult { export interface IInitOptions { skipInstall?: boolean; - includeSchedules?: boolean; - includeEnvironments?: boolean; } export async function init( @@ -32,8 +30,6 @@ export async function init( const dataformJsonPath = path.join(projectDir, "dataform.json"); const packageJsonPath = path.join(projectDir, "package.json"); const gitignorePath = path.join(projectDir, ".gitignore"); - const schedulesJsonPath = path.join(projectDir, "schedules.json"); - const environmentsJsonPath = path.join(projectDir, "environments.json"); if (fs.existsSync(dataformJsonPath) || fs.existsSync(packageJsonPath)) { throw new Error( @@ -74,18 +70,6 @@ export async function init( fs.writeFileSync(gitignorePath, gitIgnoreContents); filesWritten.push(gitignorePath); - if (options.includeSchedules) { - fs.writeFileSync( - schedulesJsonPath, - prettyJsonStringify(dataform.schedules.SchedulesJSON.create({})) - ); - filesWritten.push(schedulesJsonPath); - } - - if (options.includeEnvironments) { - fs.writeFileSync(environmentsJsonPath, prettyJsonStringify(dataform.Environments.create({}))); - } - // Make the default models, includes folders. const definitionsDir = path.join(projectDir, "definitions"); fs.mkdirSync(definitionsDir); diff --git a/api/commands/validate.ts b/api/commands/validate.ts deleted file mode 100644 index 60c34734f..000000000 --- a/api/commands/validate.ts +++ /dev/null @@ -1,106 +0,0 @@ -import cronParser from "cron-parser"; -import * as fs from "fs"; -import * as path from "path"; - -import { targetAsReadableString } from "df/core/targets"; -import { dataform } from "df/protos/ts"; - -const SCHEDULES_JSON_PATH = "schedules.json"; -// tslint:disable-next-line: tsr-detect-unsafe-regexp -const EMAIL_REGEX = /^(([^<>()\[\]\.,;:\s@\"]+(\.[^<>()\[\]\.,;:\s@\"]+)*)|(\".+\"))@(([^<>()[\]\.,;:\s@\"]+\.)+[^<>()[\]\.,;:\s@\"]{2,})$/i; - -function validateEmail(email: string) { - return EMAIL_REGEX.test(email); -} - -export function validateSchedulesFileIfExists( - compiledGraph: dataform.ICompiledGraph, - projectDir: string, - tablePrefix?: string -): string[] { - if (!compiledGraph) { - return ["Compiled graph not provided."]; - } - const filePath = path.resolve(path.join(projectDir, SCHEDULES_JSON_PATH)); - const content = fs.readFileSync(filePath, "utf8"); - try { - const scheduleJsonObj = JSON.parse(content); - return validateSchedules( - dataform.schedules.SchedulesJSON.create(scheduleJsonObj), - compiledGraph, - tablePrefix - ); - } catch (err) { - return [ - `${SCHEDULES_JSON_PATH} does not contain valid JSON conforming to the SchedulesJSON schema.` - ]; - } -} - -export function validateSchedules( - schedules: dataform.schedules.ISchedulesJSON, - compiledGraph: dataform.ICompiledGraph, - tablePrefix?: string -): string[] { - const errors: string[] = []; - - const uniqueNames = new Set(); - schedules.schedules.forEach(schedule => { - if (!schedule.name) { - errors.push("Schedule name is required."); - } - if (schedule.name && schedule.name.trim().length === 0) { - errors.push("Schedule name must not be empty."); - } - if (uniqueNames.has(schedule.name)) { - errors.push( - `Schedule name "${schedule.name}" is not unique. All schedule names must be unique.` - ); - } - uniqueNames.add(schedule.name); - - if (!schedule.cron) { - errors.push(`Cron expression is required on ${schedule.name}.`); - } - - try { - const _ = cronParser.parseExpression(schedule.cron); - } catch (e) { - errors.push( - `Schedule "${schedule.name}" contains an invalid cron expression "${schedule.cron}".` - ); - } - - if (schedule.options && schedule.options.actions) { - const allReadableTargetNames: string[] = [].concat( - compiledGraph.tables.map(table => targetAsReadableString(table.target)), - compiledGraph.assertions.map(assertion => targetAsReadableString(assertion.target)), - compiledGraph.operations.map(operation => targetAsReadableString(operation.target)), - compiledGraph.tables.map(table => table.target.name), - compiledGraph.assertions.map(assertion => assertion.target.name), - compiledGraph.operations - .filter(operation => !!operation.target) - .map(operation => operation.target.name) - ); - - schedule.options.actions.forEach(action => { - const prefixedActionName = tablePrefix ? `${tablePrefix}_${action}` : action; - if (!allReadableTargetNames.includes(prefixedActionName)) { - errors.push( - `Action "${prefixedActionName}" included in schedule ${schedule.name} doesn't exist in the project.` - ); - } - }); - } - - if (schedule.notification && schedule.notification.emails) { - schedule.notification.emails.forEach(email => { - if (!validateEmail(email)) { - errors.push(`Schedule "${schedule.name}" contains an invalid email address "${email}".`); - } - }); - } - }); - - return errors; -} diff --git a/api/index.ts b/api/index.ts index fb2a62c3a..1ae4a7a5f 100644 --- a/api/index.ts +++ b/api/index.ts @@ -8,7 +8,6 @@ import * as query from "df/api/commands/query"; import { run, Runner } from "df/api/commands/run"; import * as table from "df/api/commands/table"; import { test } from "df/api/commands/test"; -import { validateSchedules, validateSchedulesFileIfExists } from "df/api/commands/validate"; export { init, @@ -20,8 +19,6 @@ export { run, query, table, - validateSchedules, - validateSchedulesFileIfExists, Runner, Builder, prune diff --git a/cli/index.ts b/cli/index.ts index f5c6ac30d..05f1db163 100644 --- a/cli/index.ts +++ b/cli/index.ts @@ -122,7 +122,10 @@ const includeDependentsOption: INamedOption = { }, // It would be nice to use yargs' "implies" to implement this, but it doesn't work for some reason. check: (argv: yargs.Arguments) => { - if (argv[includeDependentsOption.name] && !(argv[actionsOption.name] || argv[tagsOption.name])) { + if ( + argv[includeDependentsOption.name] && + !(argv[actionsOption.name] || argv[tagsOption.name]) + ) { throw new Error( `The --${includeDependentsOption.name} flag should only be supplied along with --${actionsOption.name} or --${tagsOption.name}.` ); @@ -130,7 +133,6 @@ const includeDependentsOption: INamedOption = { } }; - const schemaSuffixOverrideOption: INamedOption = { name: "schema-suffix", option: { @@ -208,8 +210,6 @@ const timeoutOption: INamedOption = { const defaultDatabaseOptionName = "default-database"; const defaultLocationOptionName = "default-location"; const skipInstallOptionName = "skip-install"; -const includeSchedulesOptionName = "include-schedules"; -const includeEnvironmentsOptionName = "include-environments"; const testConnectionOptionName = "test-connection"; @@ -293,20 +293,6 @@ export function runCli() { describe: "Whether to skip installing NPM packages.", default: false } - }, - { - name: includeSchedulesOptionName, - option: { - describe: "Whether to initialize a schedules.json file.", - default: false - } - }, - { - name: includeEnvironmentsOptionName, - option: { - describe: "Whether to initialize a environments.json file.", - default: false - } } ], processFn: async argv => { @@ -320,9 +306,7 @@ export function runCli() { useRunCache: false }, { - skipInstall: argv[skipInstallOptionName], - includeSchedules: argv[includeSchedulesOptionName], - includeEnvironments: argv[includeEnvironmentsOptionName] + skipInstall: argv[skipInstallOptionName] } ); printInitResult(initResult); @@ -683,7 +667,8 @@ export function runCli() { actionResult.status !== dataform.ActionResult.ExecutionStatus.RUNNING ) .filter( - executedAction => !alreadyPrintedActions.has(targetAsReadableString(executedAction.target)) + executedAction => + !alreadyPrintedActions.has(targetAsReadableString(executedAction.target)) ) .forEach(executedAction => { printExecutedAction( diff --git a/core/targets.ts b/core/targets.ts index de132c94d..3ffe8040f 100644 --- a/core/targets.ts +++ b/core/targets.ts @@ -15,15 +15,13 @@ export function targetsAreEqual(a: dataform.ITarget, b: dataform.ITarget) { /** * Provides a readable string representation of the target which is used for e.g. specifying - * actions on the CLI or as part of schedule configs. + * actions on the CLI. * This is effectively equivelant to an action "name". - * + * * This is an ambiguous transformation, multiple targets may map to the same string * and it should not be used for indexing. Use {@code targetStringifier} instead. */ -export function targetAsReadableString( - target: dataform.ITarget -): string { +export function targetAsReadableString(target: dataform.ITarget): string { const nameParts = [target.name, target.schema]; if (!!target.database) { nameParts.push(target.database); diff --git a/protos/BUILD b/protos/BUILD index 9a7f3b83c..611a68794 100644 --- a/protos/BUILD +++ b/protos/BUILD @@ -12,18 +12,12 @@ proto_library( name = "dataform_proto", srcs = [ "core.proto", - "environments.proto", "evaluation.proto", "execution.proto", "profiles.proto", ], ) -proto_library( - name = "schedules_proto", - srcs = ["schedules.proto"], -) - proto_library( name = "server_proto", srcs = ["server.proto"], @@ -37,30 +31,18 @@ java_proto_library( deps = [":dataform_proto"], ) -java_proto_library( - name = "schedules_java_proto", - deps = [":schedules_proto"], -) - go_proto_library( name = "dataform_go_proto", importpath = "github.com/dataform-co/dataform/protos/dataform", proto = ":dataform_proto", ) -go_proto_library( - name = "schedules_go_proto", - importpath = "github.com/dataform-co/dataform/protos/dataform/schedules", - proto = ":schedules_proto", -) - load("//tools:ts_proto_library.bzl", "ts_proto_library") ts_proto_library( name = "ts", deps = [ ":dataform_proto", - ":schedules_proto", ":server_proto", ], ) @@ -72,8 +54,6 @@ build_test( deps = [ ":dataform_go_proto", ":dataform_java_proto", - ":schedules_go_proto", - ":schedules_java_proto", ], ) diff --git a/protos/environments.proto b/protos/environments.proto deleted file mode 100644 index 03d03c651..000000000 --- a/protos/environments.proto +++ /dev/null @@ -1,75 +0,0 @@ -syntax="proto3"; - -package dataform; - -import "protos/core.proto"; - -option go_package = "github.com/dataform-co/dataform/protos/dataform"; - -message Environments { - message Environment { - string name = 1; - - message GitReference { - oneof git_reference { - string branch = 1; - string commit_sha = 2; - } - } - - oneof git_settings { - string git_ref = 5; - // Deprecated. Please use 'git_ref' instead. - GitReference git_reference = 2; - } - - ProjectConfig config_override = 3; - - message Schedule { - // Required fields. - string name = 1; - string cron = 2; - repeated string tags = 3; - - // This is a subset of RunConfig. - message Options { - bool include_dependencies = 4; - bool include_dependents = 6; - bool full_refresh = 5; - } - // Optional. - Options options = 4; - bool disabled = 5; - - message NotificationRequirement { - string channel = 1; - - enum CompletionStatus { - UNKNOWN = 0; - SUCCESS = 1; - FAILURE = 2; - } - repeated CompletionStatus statuses = 2; - } - repeated NotificationRequirement notify = 6; - } - repeated Schedule schedules = 4; - } - repeated Environment environments = 1; - - message NotificationChannel { - string name = 1; - - message EmailNotificationChannel { - repeated string to = 1; - } - message SlackNotificationChannel { - string webhook_url = 1; - } - oneof channel { - EmailNotificationChannel email = 2; - SlackNotificationChannel slack = 3; - } - } - repeated NotificationChannel notification_channels = 2; -} diff --git a/protos/execution.proto b/protos/execution.proto index 373704439..c7d1e2fad 100644 --- a/protos/execution.proto +++ b/protos/execution.proto @@ -7,7 +7,6 @@ import "protos/core.proto"; option go_package = "github.com/dataform-co/dataform/protos/dataform"; -// This is a superset of Schedule.Options. message RunConfig { repeated string actions = 1; repeated string tags = 5; diff --git a/protos/schedules.proto b/protos/schedules.proto deleted file mode 100644 index 970d8756e..000000000 --- a/protos/schedules.proto +++ /dev/null @@ -1,70 +0,0 @@ -syntax="proto3"; - -package dataform.schedules; - -option go_package = "github.com/dataform-co/dataform/protos/dataform/schedules"; - -// NOTE ON THIS FILE: All messages are deprecated in favour of the variants in 'core.proto'. -// Please do not make any further updates to the contents of this file. - -enum NotificationEventType { - UNKNOWN = 0; - SUCCESS = 1; - FAILURE = 2; -} - -message EmailNotificationChannel { - repeated string to = 1; -} - -message SlackNotificationChannel { - string webhook_url = 1; -} - -message NotificationChannel { - string name = 1; - oneof channel { - EmailNotificationChannel email = 2; - SlackNotificationChannel slack = 3; - } -} - -message ScheduleNotification { - repeated string environments = 1; - // should consists of event type from NotificationEventType enum - repeated string events = 2; - repeated string channels = 3; -} - -message NotificationSettings { - repeated string emails = 1; - bool on_success = 2; - bool on_failure = 3; -} - -// All the field names should be in sync with RunConfig -message ScheduleOptions { - repeated string actions = 1; - repeated string tags = 4; - bool include_dependencies = 2; - bool include_dependents = 5; - bool full_refresh = 3; -} - -message Schedule { - string name = 1; // required field - bool disabled = 2; - ScheduleOptions options = 3; - string cron = 4; // required field - // Deprecated - NotificationSettings notification = 5; - - repeated ScheduleNotification notifications = 6; -} - -// Don't change the field names as it will be serialized to JSON -message SchedulesJSON { - repeated Schedule schedules = 2; - repeated NotificationChannel notification_channels = 3; - reserved 1; -} diff --git a/tests/api/validate.spec.ts b/tests/api/validate.spec.ts index 98c8db7eb..7e68bb79d 100644 --- a/tests/api/validate.spec.ts +++ b/tests/api/validate.spec.ts @@ -1,191 +1,9 @@ import { expect } from "chai"; -import * as fs from "fs"; -import * as path from "path"; -import * as dfapi from "df/api"; -import { validateSchedules } from "df/api"; import { checkDataformJsonValidity } from "df/api/commands/compile"; -import { dataform } from "df/protos/ts"; import { suite, test } from "df/testing"; -import { TmpDirFixture } from "df/tests/utils/fixtures"; - -const SCHEDULES_JSON_PATH = "schedules.json"; suite("@dataform/api/validate", () => { - suite("validateSchedules", () => { - test("returns no errors for valid schedules object", () => { - const compiledGraph = dataform.CompiledGraph.create({ - tables: [ - { - target: { - schema: "schema", - name: "action1" - } - }, - { - target: { - schema: "schema", - name: "action2" - } - }, - { - target: { - schema: "schema", - name: "action3" - } - } - ] - }); - const validSchedule = dataform.schedules.SchedulesJSON.create({ - schedules: [ - { - name: "name1", - cron: "*/2 * * * *", - disabled: false, - options: { - actions: ["action2", "action3"] - } - }, - { - name: "name2", - cron: "*/2 * * * *", - notification: { - emails: ["tada@test.com"], - onSuccess: true, - onFailure: false - } - } - ] - }); - - const errors = validateSchedules(validSchedule, compiledGraph); - expect(errors).to.eql([]); - }); - - test("test all errors", () => { - const compiledGraph = dataform.CompiledGraph.create({ - tables: [ - { - target: { - schema: "schema", - name: "action1" - } - }, - { - target: { - schema: "schema", - name: "action2" - } - } - ] - }); - - const invalidSchedule = dataform.schedules.SchedulesJSON.create({ - schedules: [ - { - name: "name1", - cron: "asdas", - notification: { - emails: ["test2.com"] - }, - options: { - actions: ["action3"] - } - }, - { - name: "name1", - cron: "*/2 * * * *" - }, - { - name: "nam2", - cron: "*/2 * * * *", - notification: { - onFailure: true - } - } - ] - }); - - const errors = validateSchedules(invalidSchedule, compiledGraph); - const expectedErrors = [ - 'Schedule "name1" contains an invalid cron expression "asdas".', - 'Action "action3" included in schedule name1 doesn\'t exist in the project.', - 'Schedule "name1" contains an invalid email address "test2.com".', - 'Schedule name "name1" is not unique. All schedule names must be unique.' - ]; - expect(errors).to.eql(expectedErrors); - }); - - suite("validate schedules.json file", ({ afterEach }) => { - const tmpDirFixture = new TmpDirFixture(afterEach); - const projectsRootDir = tmpDirFixture.createNewTmpDir(); - - test("test all errors", async () => { - const projectName = "schedules-test"; - const projectDir = path.resolve(path.join(projectsRootDir, projectName)); - const filePath = path.resolve(path.join(projectDir, SCHEDULES_JSON_PATH)); - - const compiledGraph = dataform.CompiledGraph.create({ - tables: [ - { - target: { - schema: "schema", - name: "action1" - } - }, - { - target: { - schema: "schema", - name: "action2" - } - } - ] - }); - const invalidJson = { - schedules: [ - { - name: "name1", - cron: "asdas", - notification: { - emails: ["test2.com"] - }, - options: { - actions: ["action3"] - } - }, - { - name: "name1", - cron: "*/2 * * * *" - }, - { - name: "nam2", - cron: "*/2 * * * *", - notification: { - onFailure: true - } - } - ] - }; - - const project = await dfapi.init( - projectDir, - { warehouse: "redshift" }, - { includeSchedules: true, skipInstall: true, includeEnvironments: false } - ); - - fs.writeFileSync(filePath, JSON.stringify(invalidJson)); - const expectedErrors = [ - 'Schedule "name1" contains an invalid cron expression "asdas".', - 'Action "action3" included in schedule name1 doesn\'t exist in the project.', - 'Schedule "name1" contains an invalid email address "test2.com".', - 'Schedule name "name1" is not unique. All schedule names must be unique.' - ]; - const errors = dfapi.validateSchedulesFileIfExists(compiledGraph, projectDir); - expect(errors).to.be.eql(expectedErrors); - }); - }); - }); - suite("dataform.json validation", async () => { test("fails on invalid warehouse", async () => { expect(() =>