From 086bf158443205b8f1fae13e06c845326648cb62 Mon Sep 17 00:00:00 2001 From: "xnuinside@gmail.com" Date: Wed, 6 Oct 2021 18:01:25 +0300 Subject: [PATCH 1/2] output for full script --- CHANGELOG.txt | 12 + simple_ddl_parser/ddl_parser.py | 2 + simple_ddl_parser/dialects/mssql.py | 68 ++- simple_ddl_parser/dialects/sql.py | 2 + simple_ddl_parser/parser.py | 24 +- simple_ddl_parser/tokens.py | 3 + tests/test_mssql_specific.py | 873 +++++++++++++++++++++++++++- 7 files changed, 959 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index c6ab068..6a8c00f 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,15 @@ +**v0.21.0** +### New Features: + + ## MSSQL: + + 1. Added support for statements: + 1. PERIOD FOR SYSTEM_TIME in CREATE TABLE statement + 2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py) + 3. WITH statement for TABLE properties + 4. TEXTIMAGE_ON statement + 2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL + **v0.20.0** ### New Features: diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py index 82c06c5..a378675 100755 --- a/simple_ddl_parser/ddl_parser.py +++ b/simple_ddl_parser/ddl_parser.py @@ -103,6 +103,7 @@ def t_ID(self, t): self.lexer.lp_open += 1 self.lexer.columns_def = True self.lexer.last_token = "LP" + print(t.value, t.type) return t elif self.is_token_column_name(t): @@ -128,6 +129,7 @@ def set_last_token(self, t): self.lexer.is_table = False elif t.type in ["TABLE", "INDEX"]: self.lexer.is_table = True + print(t.value, t.type) return t def t_error(self, t): diff --git a/simple_ddl_parser/dialects/mssql.py b/simple_ddl_parser/dialects/mssql.py index 8b10526..442e793 100644 --- a/simple_ddl_parser/dialects/mssql.py +++ b/simple_ddl_parser/dialects/mssql.py @@ -6,31 +6,73 @@ def p_pkey_constraint(self, p): """pkey_constraint : constraint pkey_statement ID LP index_pid RP | constraint pkey_statement LP index_pid RP | pkey_constraint with + | pkey_constraint with ON ID """ p_list = list(p) p[0] = p[1] - if len(p_list) == 3: - data = p_list[-1] + if isinstance(p[2], dict) and "with" in p[2]: + data = p_list[2] + if "ON" in p_list: + data["with"]["on"] = p_list[-1] elif len(p_list) == 7: data = {"primary_key": True, "columns": p_list[-2], p[3]: True} else: data = {"primary_key": True, "columns": p_list[-2]} + p[0]["constraint"].update(data) def p_with(self, p): - """with : WITH LP ID ID ID - | with COMMA ID ID ID - | WITH LP ID ID ON - | with COMMA ID ID ON - | with RP - | with RP ON ID + """with : WITH with_args""" + p_list = list(p) + p[0] = {"with": {"properties": [], "on": None}} + if ")" not in p_list: + p[0]["with"]["properties"] = p_list[-1]["properties"] + + def p_equals(self, p): + """equals : ID ID ID + | ID ID ON + | ID ID ID DOT ID + """ + p_list = list(p) + if "." in p_list: + p[0] = {"name": p_list[1], "value": f"{p_list[3]}.{p_list[5]}"} + else: + p[0] = {"name": p_list[-3], "value": p_list[-1]} + + def p_with_args(self, p): + """with_args : LP equals + | with_args COMMA equals + | with_args with_args + | with_args RP """ p_list = list(p) + print(p_list, "p_list, with_args") if isinstance(p[1], dict): p[0] = p[1] else: - p[0] = {"with": {"properties": [], "on": None}} - if ")" not in p_list: - p[0]["with"]["properties"].append({"name": p_list[-3], "value": p_list[-1]}) - elif "ON" in p_list: - p[0]["with"]["on"] = p_list[-1] + p[0] = {"properties": []} + if ")" != p_list[2]: + if ")" == p_list[-1]: + p[0]["properties"].append(p_list[-1]) + else: + p[0]["properties"].append(p_list[-1]) + + def p_period_for(self, p): + """period_for : PERIOD FOR ID LP pid RP""" + print(list(p)) + p[0] = {"period_for_system_time": p[5]} + + def p_expression_on_primary(self, p): + """expr : expr ON ID""" + p[0] = p[1] + p[0]["on"] = p[3] + + def p_expression_with(self, p): + """expr : expr with""" + p[0] = p[1] + p[0].update(p[2]) + + def p_expression_text_image_on(self, p): + """expr : expr TEXTIMAGE_ON ID""" + p[0] = p[1] + p[0].update({"textimage_on": p[3]}) diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py index b14c7a8..255ac7d 100644 --- a/simple_ddl_parser/dialects/sql.py +++ b/simple_ddl_parser/dialects/sql.py @@ -561,6 +561,7 @@ def p_expression_table(self, p: List) -> None: | expr COMMA uniq | expr COMMA statem_by_id | expr COMMA constraint uniq + | expr COMMA period_for | expr COMMA pkey_constraint | expr COMMA constraint pkey | expr COMMA constraint pkey enforced @@ -572,6 +573,7 @@ def p_expression_table(self, p: List) -> None: """ p[0] = p[1] p_list = list(p) + print(p_list[-1]) if p_list[-1] != "," and p_list[-1] != ")": if "type" in p_list[-1] and "name" in p_list[-1]: p[0]["columns"].append(p_list[-1]) diff --git a/simple_ddl_parser/parser.py b/simple_ddl_parser/parser.py index c6f9e61..2720b30 100755 --- a/simple_ddl_parser/parser.py +++ b/simple_ddl_parser/parser.py @@ -115,7 +115,9 @@ def process_set(tables: List, set_line: str) -> None: def parse_set_statement( self, tables: List, line: str, set_line: Optional[str] ) -> Optional[str]: + set_was_in_line = False if re.match(r"SET", line): + set_was_in_line = True if not set_line: set_line = line else: @@ -124,7 +126,7 @@ def parse_set_statement( elif set_line and len(set_line.split()) == 3: self.process_set(tables, set_line) set_line = None - return set_line + return set_line, set_was_in_line def parse_data(self): tables = [] @@ -135,34 +137,34 @@ def parse_data(self): skip_line_words = ["USE", "GO"] set_line = None for num, line in enumerate(lines): + print(repr(line)) skip = False for word in skip_line_words: if line.startswith(word): skip = True break - if skip: - continue - line, block_comments = self.pre_process_line(line, block_comments) line = line.strip().replace("\n", "").replace("\t", "") - set_line = self.parse_set_statement(tables, line, set_line) + set_line, set_was_in_line = self.parse_set_statement(tables, line, set_line) if line or num == len(lines) - 1: # to avoid issues when comma or parath are glued to column name final_line = line.strip().endswith(";") - if statement is None: - statement = line - else: - statement += f" {line}" + if not skip and not set_was_in_line: + if statement is None: + statement = line + else: + statement += f" {line}" if final_line: # end of sql operation, remove ; from end of line statement = statement[:-1] - elif num != len(lines) - 1: + elif num != len(lines) - 1 and not skip: # continue combine lines in one massive continue self.set_default_flags_in_lexer() - if not set_line: + if not set_line and statement: + print("statement", statement) self.parse_statement(tables, statement) statement = None diff --git a/simple_ddl_parser/tokens.py b/simple_ddl_parser/tokens.py index 05efb0b..2a25e49 100644 --- a/simple_ddl_parser/tokens.py +++ b/simple_ddl_parser/tokens.py @@ -55,6 +55,7 @@ "UNIQUE": "UNIQUE", "CHECK": "CHECK", "WITH": "WITH", + "PERIOD": "PERIOD", } common_statements.update(first_liners) @@ -81,6 +82,8 @@ # oracle "STORAGE": "STORAGE", "TABLESPACE": "TABLESPACE", + # mssql + "TEXTIMAGE_ON": "TEXTIMAGE_ON", } sequence_reserved = { "INCREMENT": "INCREMENT", diff --git a/tests/test_mssql_specific.py b/tests/test_mssql_specific.py index 9d8509c..b13dc78 100644 --- a/tests/test_mssql_specific.py +++ b/tests/test_mssql_specific.py @@ -2097,7 +2097,878 @@ def test_with_on(): IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON - )) ON [PRIMARY] + ) ON [PRIMARY] )""" result = DDLParser(ddl).run(group_by_type=True) assert expected == result + + +def test_period_for_system_time(): + ddl = """CREATE TABLE [dbo].[users_WorkSchedule]( + [id] [int] IDENTITY(1,1) NOT NULL, + [RequestDropDate] [smalldatetime] NULL, + [ShiftClass] [varchar](5) NULL, + [StartHistory] [datetime2](7) GENERATED ALWAYS AS ROW START NOT NULL, + [EndHistory] [datetime2](7) GENERATED ALWAYS AS ROW END NOT NULL, + CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY CLUSTERED + ( + [id] ASC + ) + WITH ( + PAD_INDEX = OFF, + STATISTICS_NORECOMPUTE = OFF, + IGNORE_DUP_KEY = OFF, + ALLOW_ROW_LOCKS = ON, + ALLOW_PAGE_LOCKS = ON + ) ON [PRIMARY], + PERIOD FOR SYSTEM_TIME ([StartHistory], [EndHistory]) + ) + """ + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [ + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[id]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[RequestDropDate]", + "nullable": True, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[ShiftClass]", + "nullable": True, + "references": None, + "size": 5, + "type": "[varchar]", + "unique": False, + }, + { + "check": None, + "default": None, + "generated": { + "always": True, + "as": "ROW START", + "stored": False, + }, + "name": "[StartHistory]", + "nullable": False, + "references": None, + "size": 7, + "type": "[datetime2]", + "unique": False, + }, + { + "check": None, + "default": None, + "generated": {"always": True, "as": "ROW END", "stored": False}, + "name": "[EndHistory]", + "nullable": False, + "references": None, + "size": 7, + "type": "[datetime2]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[id]"], + "detailed_columns": [ + {"name": "[id]", "nulls": "LAST", "order": "ASC"} + ], + }, + "constraint_name": "[PK_users_WorkSchedule_id]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "partitioned_by": [], + "period_for_system_time": ["[StartHistory]", "[EndHistory]"], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[users_WorkSchedule]", + "tablespace": None, + } + ], + "types": [], + } + assert expected == result + + +def test_on_primary_on_table_level(): + + ddl = """CREATE TABLE [dbo].[users_WorkSchedule]( + [id] [int] IDENTITY(1,1) NOT NULL, + [RequestDropDate] [smalldatetime] NULL, + [ShiftClass] [varchar](5) NULL, + [StartHistory] [datetime2](7) GENERATED ALWAYS AS ROW START NOT NULL, + [EndHistory] [datetime2](7) GENERATED ALWAYS AS ROW END NOT NULL, + CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY CLUSTERED + ( + [id] ASC + ) + WITH ( + PAD_INDEX = OFF, + STATISTICS_NORECOMPUTE = OFF, + IGNORE_DUP_KEY = OFF, + ALLOW_ROW_LOCKS = ON, + ALLOW_PAGE_LOCKS = ON + ) ON [PRIMARY], + PERIOD FOR SYSTEM_TIME ([StartHistory], [EndHistory]) + )) ON [PRIMARY] + """ + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [ + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[id]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[RequestDropDate]", + "nullable": True, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[ShiftClass]", + "nullable": True, + "references": None, + "size": 5, + "type": "[varchar]", + "unique": False, + }, + { + "check": None, + "default": None, + "generated": { + "always": True, + "as": "ROW START", + "stored": False, + }, + "name": "[StartHistory]", + "nullable": False, + "references": None, + "size": 7, + "type": "[datetime2]", + "unique": False, + }, + { + "check": None, + "default": None, + "generated": {"always": True, "as": "ROW END", "stored": False}, + "name": "[EndHistory]", + "nullable": False, + "references": None, + "size": 7, + "type": "[datetime2]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[id]"], + "detailed_columns": [ + {"name": "[id]", "nulls": "LAST", "order": "ASC"} + ], + }, + "constraint_name": "[PK_users_WorkSchedule_id]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "on": "[PRIMARY]", + "partitioned_by": [], + "period_for_system_time": ["[StartHistory]", "[EndHistory]"], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[users_WorkSchedule]", + "tablespace": None, + } + ], + "types": [], + } + assert expected == result + + +def test_with_on_table_level(): + + ddl = """USE [mystaffonline] + GO + /****** Object: Table [dbo].[users_WorkSchedule] Script Date: 9/29/2021 9:55:26 PM ******/ + SET ANSI_NULLS ON + GO + SET QUOTED_IDENTIFIER ON + GO + CREATE TABLE [dbo].[users_WorkSchedule]( + [id] [int] IDENTITY(1,1) NOT NULL, + [user_id] [int] NULL, + CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY CLUSTERED + ( + [id] ASC + ) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, + ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY], + PERIOD FOR SYSTEM_TIME ([StartHistory], [EndHistory]) + ) ON [PRIMARY] + WITH + ( + SYSTEM_VERSIONING = ON + )""" + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [ + {"name": "ANSI_NULLS", "value": "ON"}, + {"name": "QUOTED_IDENTIFIER", "value": "ON"}, + ], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [ + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[id]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[user_id]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[id]"], + "detailed_columns": [ + {"name": "[id]", "nulls": "LAST", "order": "ASC"} + ], + }, + "constraint_name": "[PK_users_WorkSchedule_id]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "on": "[PRIMARY]", + "partitioned_by": [], + "period_for_system_time": ["[StartHistory]", "[EndHistory]"], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[users_WorkSchedule]", + "tablespace": None, + "with": { + "on": None, + "properties": [{"name": "SYSTEM_VERSIONING", "value": "ON"}], + }, + } + ], + "types": [], + } + assert expected == result + + +def test_with_on_with_properties(): + + ddl = """USE [mystaffonline] +GO +/****** Object: Table [dbo].[users_WorkSchedule] Script Date: 9/29/2021 9:55:26 PM ******/ +SET ANSI_NULLS ON +GO +SET QUOTED_IDENTIFIER ON +GO +CREATE TABLE [dbo].[users_WorkSchedule]( + [id] [int] IDENTITY(1,1) NOT NULL, + [user_id] [int] NULL, + [station_Id] [int] NULL, + CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY CLUSTERED +( + [id] ASC +)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, +ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY], + PERIOD FOR SYSTEM_TIME ([StartHistory], [EndHistory]) +) ON [PRIMARY] +WITH +( +SYSTEM_VERSIONING = ON ( HISTORY_TABLE = [dbo].[users_WorkScheduleHistory] ) +)""" + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [ + {"name": "ANSI_NULLS", "value": "ON"}, + {"name": "QUOTED_IDENTIFIER", "value": "ON"}, + ], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [ + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[id]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[user_id]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[station_Id]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[id]"], + "detailed_columns": [ + {"name": "[id]", "nulls": "LAST", "order": "ASC"} + ], + }, + "constraint_name": "[PK_users_WorkSchedule_id]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "on": "[PRIMARY]", + "partitioned_by": [], + "period_for_system_time": ["[StartHistory]", "[EndHistory]"], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[users_WorkSchedule]", + "tablespace": None, + "with": { + "on": None, + "properties": [ + {"name": "SYSTEM_VERSIONING", "value": "ON"}, + { + "properties": [ + { + "name": "HISTORY_TABLE", + "value": "[dbo].[users_WorkScheduleHistory]", + } + ] + }, + ], + }, + } + ], + "types": [], + } + assert expected == result + + +def test_output_separated_by_go_and_textimage(): + + ddl = """/****** Object: Table [dbo].[TO_Requests] Script Date: 9/29/2021 9:55:26 PM ******/ +SET ANSI_NULLS ON +GO +SET QUOTED_IDENTIFIER ON +GO +CREATE TABLE [dbo].[TO_Requests]( + [Request_ID] [int] IDENTITY(1,1) NOT NULL, + [user_id] [int] NULL, + [date_from] [smalldatetime] NULL, + [date_to] [smalldatetime] NULL, + [comments] [varchar](2000) NULL, + [status] [varchar](10) NOT NULL, + [profile_id] [int] NULL, + [DateAdded] [smalldatetime] NOT NULL, + [UpdatedBy] [int] NULL, + [UpdatedOn] [smalldatetime] NULL, + [InitialResponseBy] [int] NULL, + [InitialResponseDate] [smalldatetime] NULL, + [InitialResponseStatus] [varchar](10) NULL, + [adpRequestId] [varchar](50) NULL, + CONSTRAINT [PK_TO_Requests_Request_ID] PRIMARY KEY CLUSTERED +( + [Request_ID] ASC +)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, +ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] +) ON [PRIMARY] +GO +/****** Object: Table [dbo].[ToDo] Script Date: 9/29/2021 9:55:26 PM ******/ +SET ANSI_NULLS ON +GO +SET QUOTED_IDENTIFIER ON +GO +CREATE TABLE [dbo].[ToDo]( + [ID] [int] IDENTITY(1,1) NOT NULL, + [ProfileID] [int] NOT NULL, + [Title] [varchar](50) NULL, + CONSTRAINT [PK_ToDo_ID] PRIMARY KEY CLUSTERED +( + [ID] ASC +)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, +ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] +) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] +GO +/****** Object: Table [dbo].[ToDoComments] Script Date: 9/29/2021 9:55:26 PM ******/ +SET ANSI_NULLS ON +GO +SET QUOTED_IDENTIFIER ON +GO +CREATE TABLE [dbo].[ToDoComments]( + [ToDoCommentsId] [int] IDENTITY(1,1) NOT NULL, + [CreatedBy] [int] NOT NULL, + CONSTRAINT [PK_ToDoComments_ToDoCommentsId] PRIMARY KEY CLUSTERED +( + [ToDoCommentsId] ASC +)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, +ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] +) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY] +GO""" + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [ + {"name": "ANSI_NULLS", "value": "ON"}, + {"name": "QUOTED_IDENTIFIER", "value": "ON"}, + {"name": "ANSI_NULLS", "value": "ON"}, + {"name": "QUOTED_IDENTIFIER", "value": "ON"}, + {"name": "ANSI_NULLS", "value": "ON"}, + {"name": "QUOTED_IDENTIFIER", "value": "ON"}, + ], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [ + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[Request_ID]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[user_id]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[date_from]", + "nullable": True, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[date_to]", + "nullable": True, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[comments]", + "nullable": True, + "references": None, + "size": 2000, + "type": "[varchar]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[status]", + "nullable": False, + "references": None, + "size": 10, + "type": "[varchar]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[profile_id]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[DateAdded]", + "nullable": False, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[UpdatedBy]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[UpdatedOn]", + "nullable": True, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[InitialResponseBy]", + "nullable": True, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[InitialResponseDate]", + "nullable": True, + "references": None, + "size": None, + "type": "[smalldatetime]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[InitialResponseStatus]", + "nullable": True, + "references": None, + "size": 10, + "type": "[varchar]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[adpRequestId]", + "nullable": True, + "references": None, + "size": 50, + "type": "[varchar]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[Request_ID]"], + "detailed_columns": [ + { + "name": "[Request_ID]", + "nulls": "LAST", + "order": "ASC", + } + ], + }, + "constraint_name": "[PK_TO_Requests_Request_ID]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "on": "[PRIMARY]", + "partitioned_by": [], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[TO_Requests]", + "tablespace": None, + }, + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[ID]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[ProfileID]", + "nullable": False, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[Title]", + "nullable": True, + "references": None, + "size": 50, + "type": "[varchar]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[ID]"], + "detailed_columns": [ + {"name": "[ID]", "nulls": "LAST", "order": "ASC"} + ], + }, + "constraint_name": "[PK_ToDo_ID]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "on": "[PRIMARY]", + "partitioned_by": [], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[ToDo]", + "tablespace": None, + "textimage_on": "[PRIMARY]", + }, + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": None, + "name": "[ToDoCommentsId]", + "nullable": False, + "references": None, + "size": (1, 1), + "type": "[int] IDENTITY", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "[CreatedBy]", + "nullable": False, + "references": None, + "size": None, + "type": "[int]", + "unique": False, + }, + ], + "constraints": { + "primary_keys": [ + { + "CLUSTERED": True, + "columns": { + "columns": ["[ToDoCommentsId]"], + "detailed_columns": [ + { + "name": "[ToDoCommentsId]", + "nulls": "LAST", + "order": "ASC", + } + ], + }, + "constraint_name": "[PK_ToDoComments_ToDoCommentsId]", + "with": { + "on": "[PRIMARY]", + "properties": [ + {"name": "PAD_INDEX", "value": "OFF"}, + {"name": "STATISTICS_NORECOMPUTE", "value": "OFF"}, + {"name": "IGNORE_DUP_KEY", "value": "OFF"}, + {"name": "ALLOW_ROW_LOCKS", "value": "ON"}, + {"name": "ALLOW_PAGE_LOCKS", "value": "ON"}, + ], + }, + } + ] + }, + "index": [], + "on": "[PRIMARY]", + "partitioned_by": [], + "primary_key": ["detailed_columns", "columns"], + "schema": "[dbo]", + "table_name": "[ToDoComments]", + "tablespace": None, + "textimage_on": "[PRIMARY]", + }, + ], + "types": [], + } + + assert expected == result From cd452a5768816c32f806c77fcce310dc1e4c915a Mon Sep 17 00:00:00 2001 From: "xnuinside@gmail.com" Date: Wed, 6 Oct 2021 21:43:44 +0300 Subject: [PATCH 2/2] added support for create type as table --- CHANGELOG.txt | 3 ++ README.md | 22 +++++++- docs/README.rst | 27 +++++++++- pyproject.toml | 4 +- simple_ddl_parser/ddl_parser.py | 2 - simple_ddl_parser/dialects/mssql.py | 2 - simple_ddl_parser/dialects/sql.py | 75 ++++++++++++++++++-------- simple_ddl_parser/parser.py | 2 - tests/test_custom_types_and_domains.py | 53 +++++++++++++++++- tests/test_mssql_specific.py | 40 ++++++++++++++ 10 files changed, 196 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 6a8c00f..28acb19 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -8,7 +8,10 @@ 2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py) 3. WITH statement for TABLE properties 4. TEXTIMAGE_ON statement + 5. DEFAULT NEXT VALUE FOR in COLUMN DEFAULT + 2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL + 3. Added support for CREATE TYPE as TABLE **v0.20.0** ### New Features: diff --git a/README.md b/README.md index 16427a1..a59f456 100644 --- a/README.md +++ b/README.md @@ -282,7 +282,7 @@ You also can provide a path where you want to have a dumps with schema with argu - CREATE SEQUENCE with words: INCREMENT, START, MINVALUE, MAXVALUE, CACHE -- CREATE TYPE statement: AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT +- CREATE TYPE statement: AS TABLE, AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT - LIKE statement (in this and only in this case to output will be added 'like' keyword with information about table from that we did like - 'like': {'schema': None, 'table_name': 'Old_Users'}). @@ -318,6 +318,11 @@ You also can provide a path where you want to have a dumps with schema with argu - CONSTRAINT [CLUSTERED]... PRIMARY KEY - CONSTRAINT ... WITH statement +- PERIOD FOR SYSTEM_TIME in CREATE TABLE statement +- ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py) +- WITH statement for TABLE properties +- TEXTIMAGE_ON statement +- DEFAULT NEXT VALUE FOR in COLUMN DEFAULT ### MSSQL / MySQL/ Oracle @@ -372,6 +377,21 @@ Big thanks for the involving & contribution with test cases with DDL samples & o ## Changelog +**v0.21.0** +### New Features: + + ## MSSQL: + + 1. Added support for statements: + 1. PERIOD FOR SYSTEM_TIME in CREATE TABLE statement + 2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py) + 3. WITH statement for TABLE properties + 4. TEXTIMAGE_ON statement + 5. DEFAULT NEXT VALUE FOR in COLUMN DEFAULT + + 2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL + 3. Added support for CREATE TYPE as TABLE + **v0.20.0** ### New Features: diff --git a/docs/README.rst b/docs/README.rst index de3f999..1b05169 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -307,7 +307,7 @@ Supported Statements CREATE SEQUENCE with words: INCREMENT, START, MINVALUE, MAXVALUE, CACHE * - CREATE TYPE statement: AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT + CREATE TYPE statement: AS TABLE, AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT * LIKE statement (in this and only in this case to output will be added 'like' keyword with information about table from that we did like - 'like': {'schema': None, 'table_name': 'Old_Users'}). @@ -356,6 +356,11 @@ MSSQL * CONSTRAINT [CLUSTERED]... PRIMARY KEY * CONSTRAINT ... WITH statement +* PERIOD FOR SYSTEM_TIME in CREATE TABLE statement +* ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py) +* WITH statement for TABLE properties +* TEXTIMAGE_ON statement +* DEFAULT NEXT VALUE FOR in COLUMN DEFAULT MSSQL / MySQL/ Oracle ^^^^^^^^^^^^^^^^^^^^^ @@ -426,6 +431,26 @@ Big thanks for the involving & contribution with test cases with DDL samples & o Changelog --------- +**v0.21.0** + +New Features: +^^^^^^^^^^^^^ + +.. code-block:: + + ## MSSQL: + + 1. Added support for statements: + 1. PERIOD FOR SYSTEM_TIME in CREATE TABLE statement + 2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py) + 3. WITH statement for TABLE properties + 4. TEXTIMAGE_ON statement + 5. DEFAULT NEXT VALUE FOR in COLUMN DEFAULT + + 2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL + 3. Added support for CREATE TYPE as TABLE + + **v0.20.0** New Features: diff --git a/pyproject.toml b/pyproject.toml index bee6eb0..876967a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "simple-ddl-parser" -version = "0.20.0" -description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL, Oracle, AWS Redshift, Snowflake, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl." +version = "0.21.0" +description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl." authors = ["Iuliia Volkova "] license = "MIT" readme = "docs/README.rst" diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py index a378675..82c06c5 100755 --- a/simple_ddl_parser/ddl_parser.py +++ b/simple_ddl_parser/ddl_parser.py @@ -103,7 +103,6 @@ def t_ID(self, t): self.lexer.lp_open += 1 self.lexer.columns_def = True self.lexer.last_token = "LP" - print(t.value, t.type) return t elif self.is_token_column_name(t): @@ -129,7 +128,6 @@ def set_last_token(self, t): self.lexer.is_table = False elif t.type in ["TABLE", "INDEX"]: self.lexer.is_table = True - print(t.value, t.type) return t def t_error(self, t): diff --git a/simple_ddl_parser/dialects/mssql.py b/simple_ddl_parser/dialects/mssql.py index 442e793..f4631fa 100644 --- a/simple_ddl_parser/dialects/mssql.py +++ b/simple_ddl_parser/dialects/mssql.py @@ -46,7 +46,6 @@ def p_with_args(self, p): | with_args RP """ p_list = list(p) - print(p_list, "p_list, with_args") if isinstance(p[1], dict): p[0] = p[1] else: @@ -59,7 +58,6 @@ def p_with_args(self, p): def p_period_for(self, p): """period_for : PERIOD FOR ID LP pid RP""" - print(list(p)) p[0] = {"period_for_system_time": p[5]} def p_expression_on_primary(self, p): diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py index 255ac7d..179143b 100644 --- a/simple_ddl_parser/dialects/sql.py +++ b/simple_ddl_parser/dialects/sql.py @@ -409,26 +409,44 @@ def p_multiple_column_names(self, p: List) -> None: if p_list[-1] != ",": p[0].append(p_list[-1]) - def p_expression_type_as(self, p: List) -> None: - """expr : type_name ID LP pid RP + def p_type_definition(self, p: List) -> None: # noqa: C901 + """type_definition : type_name ID LP pid RP | type_name ID LP multiple_column_names RP | type_name LP id_equals RP + | type_name TABLE LP defcolumn + | type_definition COMMA defcolumn + | type_definition RP """ - p_list = list(p) + p_list = remove_par(list(p)) p[0] = p[1] - p[0]["base_type"] = p[2] - p[0]["properties"] = {} - base_type = p[0]["base_type"].upper() - if base_type == "ENUM": - p[0]["properties"]["values"] = p_list[4] - elif p[0]["base_type"] == "OBJECT": - if "type" in p_list[4][0]: - p[0]["properties"]["attributes"] = p_list[4] + if not p[0].get("properties"): + p[0]["properties"] = {} + + if "TABLE" in p_list or isinstance(p_list[-1], dict) and p_list[-1].get("name"): + if not p[0]["properties"].get("columns"): + p[0]["properties"]["columns"] = [] + p[0]["properties"]["columns"].append(p_list[-1]) + + if len(p_list) > 3: + p[0]["base_type"] = p_list[2] else: - if isinstance(p_list[-2], list): - for item in p_list[-2]: + p[0]["base_type"] = None + if isinstance(p[0]["base_type"], str): + base_type = p[0]["base_type"].upper() + if base_type == "ENUM": + p[0]["properties"]["values"] = p_list[3] + elif p[0]["base_type"] == "OBJECT": + if "type" in p_list[3][0]: + p[0]["properties"]["attributes"] = p_list[3] + else: + if isinstance(p_list[-1], list): + for item in p_list[-1]: p[0]["properties"].update(item) + def p_expression_type_as(self, p: List) -> None: + """expr : type_definition""" + p[0] = p[1] + def p_type_name(self, p: List) -> None: """type_name : type_create ID AS | type_create ID DOT ID AS @@ -573,7 +591,6 @@ def p_expression_table(self, p: List) -> None: """ p[0] = p[1] p_list = list(p) - print(p_list[-1]) if p_list[-1] != "," and p_list[-1] != ")": if "type" in p_list[-1] and "name" in p_list[-1]: p[0]["columns"].append(p_list[-1]) @@ -843,33 +860,45 @@ def p_funct_expr(self, p: List) -> None: else: p[0] = p[1] + def p_dot_id(self, p: List) -> None: + """dot_id : ID DOT ID""" + p[0] = f"{p[1]}.{p[3]}" + def p_default(self, p: List) -> None: """default : DEFAULT ID | DEFAULT STRING | DEFAULT NULL + | default FOR dot_id | DEFAULT funct_expr | DEFAULT LP pid RP | default ID | default LP RP """ p_list = list(p) + if len(p_list) == 5 and isinstance(p[3], list): default = p[3][0] + elif "DEFAULT" in p_list and len(p_list) == 4: + default = f"{p[2]} {p[3]}" else: default = p[2] - if default.isnumeric(): + if not isinstance(default, dict) and default.isnumeric(): default = int(default) + if isinstance(p[1], dict): p[0] = p[1] - for i in p[2:]: - if isinstance(p[2], str): - p[2] = p[2].replace("\\'", "'") - if i == ")" or i == "(": - p[0]["default"] = str(p[0]["default"]) + f"{i}" - else: - p[0]["default"] = str(p[0]["default"]) + f" {i}" - p[0]["default"] = p[0]["default"].replace("))", ")") + if "FOR" in default: + p[0]["default"] = {"next_value_for": p_list[-1]} + else: + for i in p[2:]: + if isinstance(p[2], str): + p[2] = p[2].replace("\\'", "'") + if i == ")" or i == "(": + p[0]["default"] = str(p[0]["default"]) + f"{i}" + else: + p[0]["default"] = str(p[0]["default"]) + f" {i}" + p[0]["default"] = p[0]["default"].replace("))", ")") else: p[0] = {"default": default} diff --git a/simple_ddl_parser/parser.py b/simple_ddl_parser/parser.py index 2720b30..fa6f530 100755 --- a/simple_ddl_parser/parser.py +++ b/simple_ddl_parser/parser.py @@ -137,7 +137,6 @@ def parse_data(self): skip_line_words = ["USE", "GO"] set_line = None for num, line in enumerate(lines): - print(repr(line)) skip = False for word in skip_line_words: if line.startswith(word): @@ -164,7 +163,6 @@ def parse_data(self): self.set_default_flags_in_lexer() if not set_line and statement: - print("statement", statement) self.parse_statement(tables, statement) statement = None diff --git a/tests/test_custom_types_and_domains.py b/tests/test_custom_types_and_domains.py index 1fa79f7..8aa457a 100644 --- a/tests/test_custom_types_and_domains.py +++ b/tests/test_custom_types_and_domains.py @@ -145,7 +145,7 @@ def test_create_type_with_input_properties(): "tables": [], "types": [ { - "base_type": "(", + "base_type": None, "properties": { "INPUT": "my_box_in_function", "INTERNALLENGTH": "16", @@ -158,3 +158,54 @@ def test_create_type_with_input_properties(): } assert expected == result + + +def test_as_table(): + ddl = """ + CREATE TYPE dbo.T_LCT_SLIPS AS TABLE ( + hashKY varbinary(48), + numContratoGF bigint + ); + """ + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [], + "types": [ + { + "base_type": None, + "properties": { + "columns": [ + { + "check": None, + "default": None, + "name": "hashKY", + "nullable": True, + "primary_key": False, + "references": None, + "size": 48, + "type": "varbinary", + "unique": False, + }, + { + "check": None, + "default": None, + "name": "numContratoGF", + "nullable": True, + "primary_key": False, + "references": None, + "size": None, + "type": "bigint", + "unique": False, + }, + ] + }, + "schema": "dbo", + "type_name": "T_LCT_SLIPS", + } + ], + } + assert result == expected diff --git a/tests/test_mssql_specific.py b/tests/test_mssql_specific.py index b13dc78..82ccf3a 100644 --- a/tests/test_mssql_specific.py +++ b/tests/test_mssql_specific.py @@ -2972,3 +2972,43 @@ def test_output_separated_by_go_and_textimage(): } assert expected == result + + +def test_next_value_for(): + + ddl = """CREATE TABLE [dbo].[SLIPEVENTO] ( + [cdSLIP] [bigint] NOT NULL + DEFAULT NEXT VALUE FOR [dbo].[sqCdSLIPEvt] )""" + result = DDLParser(ddl).run(group_by_type=True) + expected = { + "ddl_properties": [], + "domains": [], + "schemas": [], + "sequences": [], + "tables": [ + { + "alter": {}, + "checks": [], + "columns": [ + { + "check": None, + "default": {"next_value_for": "[dbo].[sqCdSLIPEvt]"}, + "name": "[cdSLIP]", + "nullable": False, + "references": None, + "size": None, + "type": "[bigint]", + "unique": False, + } + ], + "index": [], + "partitioned_by": [], + "primary_key": [], + "schema": "[dbo]", + "table_name": "[SLIPEVENTO]", + "tablespace": None, + } + ], + "types": [], + } + assert expected == result