Merge pull request #84 from xnuinside/v0.20.0_release_mssql

V0.21.0 huge MSSQL Release
xnuinside · Oct 6, 2021 · 1ebc189 · 1ebc189
2 parents ce71e4d + cd452a5
commit 1ebc189
Show file tree

Hide file tree

Showing 10 changed files with 1,148 additions and 52 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,18 @@
+**v0.21.0**
+### New Features:
+
+    ## MSSQL:
+
+    1. Added support for statements: 
+        1. PERIOD FOR SYSTEM_TIME in CREATE TABLE statement
+        2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py)
+        3. WITH statement for TABLE properties
+        4. TEXTIMAGE_ON statement
+        5. DEFAULT NEXT VALUE FOR in COLUMN DEFAULT
+
+    2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL
+    3. Added support for CREATE TYPE as TABLE
+
 **v0.20.0**
 ### New Features:
 

diff --git a/README.md b/README.md
@@ -282,7 +282,7 @@ You also can provide a path where you want to have a dumps with schema with argu
 
 - CREATE SEQUENCE with words: INCREMENT, START, MINVALUE, MAXVALUE, CACHE
 
-- CREATE TYPE statement:  AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT
+- CREATE TYPE statement:  AS TABLE, AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT
 
 - LIKE statement (in this and only in this case to output will be added 'like' keyword with information about table from that we did like - 'like': {'schema': None, 'table_name': 'Old_Users'}).
 
@@ -318,6 +318,11 @@ You also can provide a path where you want to have a dumps with schema with argu
 
 - CONSTRAINT [CLUSTERED]... PRIMARY KEY
 - CONSTRAINT ... WITH statement
+- PERIOD FOR SYSTEM_TIME in CREATE TABLE statement
+- ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py)
+- WITH statement for TABLE properties
+- TEXTIMAGE_ON statement
+- DEFAULT NEXT VALUE FOR in COLUMN DEFAULT
 
 ### MSSQL / MySQL/ Oracle
 
@@ -372,6 +377,21 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 
 
 ## Changelog
+**v0.21.0**
+### New Features:
+
+    ## MSSQL:
+
+    1. Added support for statements: 
+        1. PERIOD FOR SYSTEM_TIME in CREATE TABLE statement
+        2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py)
+        3. WITH statement for TABLE properties
+        4. TEXTIMAGE_ON statement
+        5. DEFAULT NEXT VALUE FOR in COLUMN DEFAULT
+
+    2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL
+    3. Added support for CREATE TYPE as TABLE
+
 **v0.20.0**
 ### New Features:
 

diff --git a/docs/README.rst b/docs/README.rst
@@ -307,7 +307,7 @@ Supported Statements
   CREATE SEQUENCE with words: INCREMENT, START, MINVALUE, MAXVALUE, CACHE
 
 * 
-  CREATE TYPE statement:  AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT
+  CREATE TYPE statement:  AS TABLE, AS ENUM, AS OBJECT, INTERNALLENGTH, INPUT, OUTPUT
 
 * 
   LIKE statement (in this and only in this case to output will be added 'like' keyword with information about table from that we did like - 'like': {'schema': None, 'table_name': 'Old_Users'}).
@@ -356,6 +356,11 @@ MSSQL
 
 * CONSTRAINT [CLUSTERED]... PRIMARY KEY
 * CONSTRAINT ... WITH statement
+* PERIOD FOR SYSTEM_TIME in CREATE TABLE statement
+* ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py)
+* WITH statement for TABLE properties
+* TEXTIMAGE_ON statement
+* DEFAULT NEXT VALUE FOR in COLUMN DEFAULT
 
 MSSQL / MySQL/ Oracle
 ^^^^^^^^^^^^^^^^^^^^^
@@ -426,6 +431,26 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 Changelog
 ---------
 
+**v0.21.0**
+
+New Features:
+^^^^^^^^^^^^^
+
+.. code-block::
+
+   ## MSSQL:
+
+   1. Added support for statements: 
+       1. PERIOD FOR SYSTEM_TIME in CREATE TABLE statement
+       2. ON [PRIMARY] after CREATE TABLE statement (sample in test files test_mssql_specific.py)
+       3. WITH statement for TABLE properties
+       4. TEXTIMAGE_ON statement
+       5. DEFAULT NEXT VALUE FOR in COLUMN DEFAULT
+
+   2. Added support for separating tables DDL by 'GO' statement as in output of MSSQL
+   3. Added support for CREATE TYPE as TABLE
+
+
 **v0.20.0**
 
 New Features:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "0.20.0"
-description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL, Oracle, AWS Redshift, Snowflake, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
+version = "0.21.0"
+description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <[email protected]>"]
 license = "MIT"
 readme = "docs/README.rst"

diff --git a/simple_ddl_parser/dialects/mssql.py b/simple_ddl_parser/dialects/mssql.py
@@ -6,31 +6,71 @@ def p_pkey_constraint(self, p):
         """pkey_constraint : constraint pkey_statement ID LP index_pid RP
         | constraint pkey_statement LP index_pid RP
         | pkey_constraint with
+        | pkey_constraint with ON ID
         """
         p_list = list(p)
         p[0] = p[1]
-        if len(p_list) == 3:
-            data = p_list[-1]
+        if isinstance(p[2], dict) and "with" in p[2]:
+            data = p_list[2]
+            if "ON" in p_list:
+                data["with"]["on"] = p_list[-1]
         elif len(p_list) == 7:
             data = {"primary_key": True, "columns": p_list[-2], p[3]: True}
         else:
             data = {"primary_key": True, "columns": p_list[-2]}
+
         p[0]["constraint"].update(data)
 
     def p_with(self, p):
-        """with : WITH LP ID ID ID
-        | with COMMA ID ID ID
-        | WITH LP ID ID ON
-        | with COMMA ID ID ON
-        | with RP
-        | with RP ON ID
+        """with : WITH with_args"""
+        p_list = list(p)
+        p[0] = {"with": {"properties": [], "on": None}}
+        if ")" not in p_list:
+            p[0]["with"]["properties"] = p_list[-1]["properties"]
+
+    def p_equals(self, p):
+        """equals : ID ID ID
+        | ID ID ON
+        | ID ID ID DOT ID
+        """
+        p_list = list(p)
+        if "." in p_list:
+            p[0] = {"name": p_list[1], "value": f"{p_list[3]}.{p_list[5]}"}
+        else:
+            p[0] = {"name": p_list[-3], "value": p_list[-1]}
+
+    def p_with_args(self, p):
+        """with_args : LP equals
+        | with_args COMMA equals
+        | with_args with_args
+        | with_args RP
         """
         p_list = list(p)
         if isinstance(p[1], dict):
             p[0] = p[1]
         else:
-            p[0] = {"with": {"properties": [], "on": None}}
-        if ")" not in p_list:
-            p[0]["with"]["properties"].append({"name": p_list[-3], "value": p_list[-1]})
-        elif "ON" in p_list:
-            p[0]["with"]["on"] = p_list[-1]
+            p[0] = {"properties": []}
+        if ")" != p_list[2]:
+            if ")" == p_list[-1]:
+                p[0]["properties"].append(p_list[-1])
+            else:
+                p[0]["properties"].append(p_list[-1])
+
+    def p_period_for(self, p):
+        """period_for : PERIOD FOR ID LP pid RP"""
+        p[0] = {"period_for_system_time": p[5]}
+
+    def p_expression_on_primary(self, p):
+        """expr : expr ON ID"""
+        p[0] = p[1]
+        p[0]["on"] = p[3]
+
+    def p_expression_with(self, p):
+        """expr : expr with"""
+        p[0] = p[1]
+        p[0].update(p[2])
+
+    def p_expression_text_image_on(self, p):
+        """expr : expr TEXTIMAGE_ON ID"""
+        p[0] = p[1]
+        p[0].update({"textimage_on": p[3]})
diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py
@@ -409,26 +409,44 @@ def p_multiple_column_names(self, p: List) -> None:
             if p_list[-1] != ",":
                 p[0].append(p_list[-1])
 
-    def p_expression_type_as(self, p: List) -> None:
-        """expr : type_name ID LP pid RP
+    def p_type_definition(self, p: List) -> None:  # noqa: C901
+        """type_definition : type_name ID LP pid RP
         | type_name ID LP multiple_column_names RP
         | type_name LP id_equals RP
+        | type_name TABLE LP defcolumn
+        | type_definition COMMA defcolumn
+        | type_definition RP
         """
-        p_list = list(p)
+        p_list = remove_par(list(p))
         p[0] = p[1]
-        p[0]["base_type"] = p[2]
-        p[0]["properties"] = {}
-        base_type = p[0]["base_type"].upper()
-        if base_type == "ENUM":
-            p[0]["properties"]["values"] = p_list[4]
-        elif p[0]["base_type"] == "OBJECT":
-            if "type" in p_list[4][0]:
-                p[0]["properties"]["attributes"] = p_list[4]
+        if not p[0].get("properties"):
+            p[0]["properties"] = {}
+
+        if "TABLE" in p_list or isinstance(p_list[-1], dict) and p_list[-1].get("name"):
+            if not p[0]["properties"].get("columns"):
+                p[0]["properties"]["columns"] = []
+            p[0]["properties"]["columns"].append(p_list[-1])
+
+        if len(p_list) > 3:
+            p[0]["base_type"] = p_list[2]
         else:
-            if isinstance(p_list[-2], list):
-                for item in p_list[-2]:
+            p[0]["base_type"] = None
+        if isinstance(p[0]["base_type"], str):
+            base_type = p[0]["base_type"].upper()
+            if base_type == "ENUM":
+                p[0]["properties"]["values"] = p_list[3]
+            elif p[0]["base_type"] == "OBJECT":
+                if "type" in p_list[3][0]:
+                    p[0]["properties"]["attributes"] = p_list[3]
+        else:
+            if isinstance(p_list[-1], list):
+                for item in p_list[-1]:
                     p[0]["properties"].update(item)
 
+    def p_expression_type_as(self, p: List) -> None:
+        """expr : type_definition"""
+        p[0] = p[1]
+
     def p_type_name(self, p: List) -> None:
         """type_name : type_create ID AS
         | type_create ID DOT ID AS
@@ -561,6 +579,7 @@ def p_expression_table(self, p: List) -> None:
         | expr COMMA uniq
         | expr COMMA statem_by_id
         | expr COMMA constraint uniq
+        | expr COMMA period_for
         | expr COMMA pkey_constraint
         | expr COMMA constraint pkey
         | expr COMMA constraint pkey enforced
@@ -841,33 +860,45 @@ def p_funct_expr(self, p: List) -> None:
         else:
             p[0] = p[1]
 
+    def p_dot_id(self, p: List) -> None:
+        """dot_id : ID DOT ID"""
+        p[0] = f"{p[1]}.{p[3]}"
+
     def p_default(self, p: List) -> None:
         """default : DEFAULT ID
         | DEFAULT STRING
         | DEFAULT NULL
+        | default FOR dot_id
         | DEFAULT funct_expr
         | DEFAULT LP pid RP
         | default ID
         | default LP RP
         """
         p_list = list(p)
+
         if len(p_list) == 5 and isinstance(p[3], list):
             default = p[3][0]
+        elif "DEFAULT" in p_list and len(p_list) == 4:
+            default = f"{p[2]} {p[3]}"
         else:
             default = p[2]
 
-        if default.isnumeric():
+        if not isinstance(default, dict) and default.isnumeric():
             default = int(default)
+
         if isinstance(p[1], dict):
             p[0] = p[1]
-            for i in p[2:]:
-                if isinstance(p[2], str):
-                    p[2] = p[2].replace("\\'", "'")
-                    if i == ")" or i == "(":
-                        p[0]["default"] = str(p[0]["default"]) + f"{i}"
-                    else:
-                        p[0]["default"] = str(p[0]["default"]) + f" {i}"
-                    p[0]["default"] = p[0]["default"].replace("))", ")")
+            if "FOR" in default:
+                p[0]["default"] = {"next_value_for": p_list[-1]}
+            else:
+                for i in p[2:]:
+                    if isinstance(p[2], str):
+                        p[2] = p[2].replace("\\'", "'")
+                        if i == ")" or i == "(":
+                            p[0]["default"] = str(p[0]["default"]) + f"{i}"
+                        else:
+                            p[0]["default"] = str(p[0]["default"]) + f" {i}"
+                        p[0]["default"] = p[0]["default"].replace("))", ")")
         else:
             p[0] = {"default": default}
 

diff --git a/simple_ddl_parser/parser.py b/simple_ddl_parser/parser.py
@@ -115,7 +115,9 @@ def process_set(tables: List, set_line: str) -> None:
     def parse_set_statement(
         self, tables: List, line: str, set_line: Optional[str]
     ) -> Optional[str]:
+        set_was_in_line = False
         if re.match(r"SET", line):
+            set_was_in_line = True
             if not set_line:
                 set_line = line
             else:
@@ -124,7 +126,7 @@ def parse_set_statement(
         elif set_line and len(set_line.split()) == 3:
             self.process_set(tables, set_line)
             set_line = None
-        return set_line
+        return set_line, set_was_in_line
 
     def parse_data(self):
         tables = []
@@ -140,29 +142,27 @@ def parse_data(self):
                 if line.startswith(word):
                     skip = True
                     break
-            if skip:
-                continue
-
             line, block_comments = self.pre_process_line(line, block_comments)
             line = line.strip().replace("\n", "").replace("\t", "")
-            set_line = self.parse_set_statement(tables, line, set_line)
+            set_line, set_was_in_line = self.parse_set_statement(tables, line, set_line)
             if line or num == len(lines) - 1:
                 # to avoid issues when comma or parath are glued to column name
                 final_line = line.strip().endswith(";")
-                if statement is None:
-                    statement = line
-                else:
-                    statement += f" {line}"
+                if not skip and not set_was_in_line:
+                    if statement is None:
+                        statement = line
+                    else:
+                        statement += f" {line}"
 
                 if final_line:
                     # end of sql operation, remove ; from end of line
                     statement = statement[:-1]
-                elif num != len(lines) - 1:
+                elif num != len(lines) - 1 and not skip:
                     # continue combine lines in one massive
                     continue
 
                 self.set_default_flags_in_lexer()
-                if not set_line:
+                if not set_line and statement:
                     self.parse_statement(tables, statement)
 
                 statement = None

diff --git a/simple_ddl_parser/tokens.py b/simple_ddl_parser/tokens.py
@@ -55,6 +55,7 @@
     "UNIQUE": "UNIQUE",
     "CHECK": "CHECK",
     "WITH": "WITH",
+    "PERIOD": "PERIOD",
 }
 
 common_statements.update(first_liners)
@@ -81,6 +82,8 @@
     # oracle
     "STORAGE": "STORAGE",
     "TABLESPACE": "TABLESPACE",
+    # mssql
+    "TEXTIMAGE_ON": "TEXTIMAGE_ON",
 }
 sequence_reserved = {
     "INCREMENT": "INCREMENT",