From ed1d7e7549ed1b390e6c0e013522d74e980a6e39 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 20 Feb 2020 13:07:52 +0800 Subject: [PATCH] [SPARK-30878][SQL][DOC] Improve the CREATE TABLE document ### What changes were proposed in this pull request? Improve the CREATE TABLE document: 1. mention that some clauses can come in as any order. 2. refine the description for some parameters. 3. mention how data source table interacts with data source 4. make the examples consistent between data source and hive serde tables. ### Why are the changes needed? improve doc ### Does this PR introduce any user-facing change? no ### How was this patch tested? N/A Closes #27638 from cloud-fan/doc. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan --- ...-ref-syntax-ddl-create-table-datasource.md | 49 ++++++++++++++----- ...-ref-syntax-ddl-create-table-hiveformat.md | 47 ++++++++++++------ docs/sql-ref-syntax-ddl-create-table.md | 4 +- 3 files changed, 72 insertions(+), 28 deletions(-) diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md index 9b15c0865b38a..532377d7fcec3 100644 --- a/docs/sql-ref-syntax-ddl-create-table-datasource.md +++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md @@ -27,7 +27,7 @@ The `CREATE TABLE` statement defines a new table using a Data Source. {% highlight sql %} CREATE TABLE [ IF NOT EXISTS ] table_identifier [ ( col_name1 col_type1 [ COMMENT col_comment1 ], ... ) ] - USING data_source + [USING data_source] [ OPTIONS ( key1=val1, key2=val2, ... ) ] [ PARTITIONED BY ( col_name1, col_name2, ... ) ] [ CLUSTERED BY ( col_name3, col_name4, ... ) @@ -39,6 +39,9 @@ CREATE TABLE [ IF NOT EXISTS ] table_identifier [ AS select_statement ] {% endhighlight %} +Note that, the clauses between the USING clause and the AS SELECT clause can come in +as any order. For example, you can write COMMENT table_comment after TBLPROPERTIES. + ### Parameters
@@ -78,13 +81,12 @@ CREATE TABLE [ IF NOT EXISTS ] table_identifier
COMMENT
-
Table comments are added.
+
A string literal to describe the table.
TBLPROPERTIES
-
Table properties that have to be set are specified, such as `created.by.user`, `owner`, etc. -
+
A list of key-value pairs that is used to tag the table definition.
@@ -92,18 +94,43 @@ CREATE TABLE [ IF NOT EXISTS ] table_identifier
The table is populated using the data from the select statement.
+### Data Source Interaction +A Data Source table acts like a pointer to the underlying data source. For example, you can create +a table "foo" in Spark which points to a table "bar" in MySQL using JDBC Data Source. When you +read/write table "foo", you actually read/write table "bar". + +In general CREATE TABLE is creating a "pointer", and you need to make sure it points to something +existing. An exception is file source such as parquet, json. If you don't specify the LOCATION, +Spark will create a default table location for you. + +For CREATE TABLE AS SELECT, Spark will overwrite the underlying data source with the data of the +input query, to make sure the table gets created contains exactly the same data as the input query. + ### Examples {% highlight sql %} ---Using data source -CREATE TABLE Student (Id INT,name STRING ,age INT) USING CSV; +--Use data source +CREATE TABLE student (id INT, name STRING, age INT) USING CSV; + +--Use data from another table +CREATE TABLE student_copy USING CSV + AS SELECT * FROM student; + +--Omit the USING clause, which uses the default data source (parquet by default) +CREATE TABLE student (id INT, name STRING, age INT); + +--Specify table comment and properties +CREATE TABLE student (id INT, name STRING, age INT) USING CSV + COMMENT 'this is a comment' + TBLPROPERTIES ('foo'='bar'); ---Using data from another table -CREATE TABLE StudentInfo - AS SELECT * FROM Student; +--Specify table comment and properties with different clauses order +CREATE TABLE student (id INT, name STRING, age INT) USING CSV + TBLPROPERTIES ('foo'='bar') + COMMENT 'this is a comment'; ---Partitioned and bucketed -CREATE TABLE Student (Id INT,name STRING ,age INT) +--Create partitioned and bucketed table +CREATE TABLE student (id INT, name STRING, age INT) USING CSV PARTITIONED BY (age) CLUSTERED BY (Id) INTO 4 buckets; diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md index 78b7d0581e985..0425bafd94398 100644 --- a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md +++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md @@ -37,6 +37,9 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier {% endhighlight %} +Note that, the clauses between the columns definition clause and the AS SELECT clause can come in +as any order. For example, you can write COMMENT table_comment after TBLPROPERTIES. + ### Parameters
@@ -77,14 +80,12 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier
COMMENT
-
Table comments are added.
+
A string literal to describe the table.
TBLPROPERTIES
-
- Table properties that have to be set are specified, such as `created.by.user`, `owner`, etc. -
+
A list of key-value pairs that is used to tag the table definition.
@@ -96,21 +97,37 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier ### Examples {% highlight sql %} ---Using Comment and loading data from another table into the created table -CREATE TABLE StudentInfo - COMMENT 'Table is created using existing data' - AS SELECT * FROM Student; +--Use hive format +CREATE TABLE student (id INT, name STRING, age INT) STORED AS ORC; + +--Use data from another table +CREATE TABLE student_copy STORED AS ORC + AS SELECT * FROM student; + +--Specify table comment and properties +CREATE TABLE student (id INT, name STRING, age INT) + COMMENT 'this is a comment' + STORED AS ORC + TBLPROPERTIES ('foo'='bar'); + +--Specify table comment and properties with different clauses order +CREATE TABLE student (id INT, name STRING, age INT) + STORED AS ORC + TBLPROPERTIES ('foo'='bar') + COMMENT 'this is a comment'; ---Partitioned table -CREATE TABLE Student (Id INT,name STRING) +--Create partitioned table +CREATE TABLE student (id INT, name STRING) PARTITIONED BY (age INT) - TBLPROPERTIES ('owner'='xxxx'); + STORED AS ORC; -CREATE TABLE Student (Id INT,name STRING,age INT) - PARTITIONED BY (name,age); +--Create partitioned table with different clauses order +CREATE TABLE student (id INT, name STRING) + STORED AS ORC + PARTITIONED BY (age INT); ---Using Row Format and file format -CREATE TABLE Student (Id INT,name STRING) +--Use Row Format and file format +CREATE TABLE student (id INT,name STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE; diff --git a/docs/sql-ref-syntax-ddl-create-table.md b/docs/sql-ref-syntax-ddl-create-table.md index c0e58a41cf5cc..20aff6fb823cb 100644 --- a/docs/sql-ref-syntax-ddl-create-table.md +++ b/docs/sql-ref-syntax-ddl-create-table.md @@ -20,10 +20,10 @@ license: | --- ### Description -`CREATE TABLE` statement is used to define a table in an exsisting database. +`CREATE TABLE` statement is used to define a table in an existing database. The CREATE statements: -* [CREATE TABLE USING DATASOURCE](sql-ref-syntax-ddl-create-table-datasource.html) +* [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html) * [CREATE TABLE USING HIVE FORMAT](sql-ref-syntax-ddl-create-table-hiveformat.html) * [CREATE TABLE LIKE](sql-ref-syntax-ddl-create-table-like.html)