Skip to content

Commit

Permalink
pretty format Struct DataType
Browse files Browse the repository at this point in the history
  • Loading branch information
wenshao committed Dec 15, 2024
1 parent 994c5d5 commit b0af2a0
Show file tree
Hide file tree
Showing 8 changed files with 1,110 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,18 @@ protected SQLStructDataType parseDataTypeStruct0() {
SQLName name;
String str = lexer.stringVal();
if (BQ.DIALECT.isBuiltInDataType(str)) {
name = null;
Lexer.SavePoint mark = lexer.markOut();
lexer.nextToken();
String tokenName = lexer.token() == Token.IDENTIFIER ? lexer.stringVal() : lexer.token().name;
if (tokenName != null
&& Character.isLetter(tokenName.charAt(0))
&& BQ.DIALECT.isBuiltInDataType(lexer.stringVal())
) {
name = new SQLIdentifierExpr(str);
} else {
lexer.reset(mark);
name = null;
}
} else {
name = new SQLIdentifierExpr(str);
lexer.nextToken();
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/java/com/alibaba/druid/sql/parser/Token.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public enum Token {
REPLACE("REPLACE"),
PERIOD("PERIOD"),

BITS,
BITS("BITS"),

// MySql procedure add by zz
WHILE("WHILE"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10022,7 +10022,39 @@ public boolean visit(SQLMapDataType x) {
@Override
public boolean visit(SQLStructDataType x) {
print0(ucase ? "STRUCT<" : "struct<");
printAndAccept(x.getFields(), ", ");
List<SQLStructDataType.Field> fields = x.getFields();
boolean needPrintLine = false;
if (fields.size() > 5) {
needPrintLine = true;
} else {
for (SQLStructDataType.Field field : fields) {
SQLDataType fieldDataType = field.getDataType();
if (fieldDataType instanceof SQLArrayDataType || fieldDataType instanceof SQLStructDataType) {
needPrintLine = true;
break;
}
}
}
if (needPrintLine) {
incrementIndent();
println();
}
for (int i = 0; i < fields.size(); i++) {
if (i != 0) {
if (needPrintLine) {
print0(',');
println();
} else {
print0(", ");
}
}
SQLStructDataType.Field field = fields.get(i);
field.accept(this);
}
if (needPrintLine) {
decrementIndent();
println();
}
print('>');
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,13 @@ public void test_0() throws Exception {

assertEquals("CREATE EXTERNAL TABLE `json_table_1` (\n" +
"\t`docid` string COMMENT 'from deserializer',\n" +
"\t`user_1` STRUCT<id:int, username:string, name:string, shippingaddress:STRUCT<address1:string, address2:string, city:string, state:string>, orders:ARRAY<STRUCT<itemid:int, orderdate:string>>> COMMENT 'from deserializer'\n" +
"\t`user_1` STRUCT<\n" +
"\t\tid:int,\n" +
"\t\tusername:string,\n" +
"\t\tname:string,\n" +
"\t\tshippingaddress:STRUCT<address1:string, address2:string, city:string, state:string>,\n" +
"\t\torders:ARRAY<STRUCT<itemid:int, orderdate:string>>\n" +
"\t> COMMENT 'from deserializer'\n" +
")\n" +
"ROW FORMAT\n" +
"\tSERDE 'org.apache.hive.hcatalog.data.JsonSerDe'\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,80 @@ public void test_0() throws Exception {
String text = SQLUtils.toSQLString(stmt, JdbcConstants.HIVE);

assertEquals("CREATE EXTERNAL TABLE user_snp_test_4 (\n" +
"\tvariant STRUCT<contigName:string, start:bigint, end:bigint, names:ARRAY<string>, splitFromMultiAllelic:boolean, referenceAllele:string, alternateAllele:string, quality:double, filtersApplied:boolean, filtersPassed:boolean, filtersFailed:ARRAY<string>, annotation:STRUCT<ancestralAllele:string, alleleCount:int, readDepth:int, forwardReadDepth:int, reverseReadDepth:int, referenceReadDepth:int, referenceForwardReadDepth:int, referenceReverseReadDepth:int, alleleFrequency:float, cigar:string, dbSnp:boolean, hapMap2:boolean, hapMap3:boolean, validated:boolean, thousandGenomes:boolean, somatic:boolean, transcriptEffects:ARRAY<STRUCT<alternateAllele:string, effects:ARRAY<string>, geneName:string, geneId:string, featureType:string, featureId:string, biotype:string, rank:int, total:int, genomicHgvs:string, transcriptHgvs:string, proteinHgvs:string, cdnaPosition:int, cdnaLength:int, cdsPosition:int, cdsLength:int, proteinPosition:int, proteinLength:int, distance:int, messages:ARRAY<string>>>, attributes:MAP<string, string>>>,\n" +
"\tvariant STRUCT<\n" +
"\t\tcontigName:string,\n" +
"\t\tstart:bigint,\n" +
"\t\tend:bigint,\n" +
"\t\tnames:ARRAY<string>,\n" +
"\t\tsplitFromMultiAllelic:boolean,\n" +
"\t\treferenceAllele:string,\n" +
"\t\talternateAllele:string,\n" +
"\t\tquality:double,\n" +
"\t\tfiltersApplied:boolean,\n" +
"\t\tfiltersPassed:boolean,\n" +
"\t\tfiltersFailed:ARRAY<string>,\n" +
"\t\tannotation:STRUCT<\n" +
"\t\t\tancestralAllele:string,\n" +
"\t\t\talleleCount:int,\n" +
"\t\t\treadDepth:int,\n" +
"\t\t\tforwardReadDepth:int,\n" +
"\t\t\treverseReadDepth:int,\n" +
"\t\t\treferenceReadDepth:int,\n" +
"\t\t\treferenceForwardReadDepth:int,\n" +
"\t\t\treferenceReverseReadDepth:int,\n" +
"\t\t\talleleFrequency:float,\n" +
"\t\t\tcigar:string,\n" +
"\t\t\tdbSnp:boolean,\n" +
"\t\t\thapMap2:boolean,\n" +
"\t\t\thapMap3:boolean,\n" +
"\t\t\tvalidated:boolean,\n" +
"\t\t\tthousandGenomes:boolean,\n" +
"\t\t\tsomatic:boolean,\n" +
"\t\t\ttranscriptEffects:ARRAY<STRUCT<\n" +
"\t\t\t\talternateAllele:string,\n" +
"\t\t\t\teffects:ARRAY<string>,\n" +
"\t\t\t\tgeneName:string,\n" +
"\t\t\t\tgeneId:string,\n" +
"\t\t\t\tfeatureType:string,\n" +
"\t\t\t\tfeatureId:string,\n" +
"\t\t\t\tbiotype:string,\n" +
"\t\t\t\trank:int,\n" +
"\t\t\t\ttotal:int,\n" +
"\t\t\t\tgenomicHgvs:string,\n" +
"\t\t\t\ttranscriptHgvs:string,\n" +
"\t\t\t\tproteinHgvs:string,\n" +
"\t\t\t\tcdnaPosition:int,\n" +
"\t\t\t\tcdnaLength:int,\n" +
"\t\t\t\tcdsPosition:int,\n" +
"\t\t\t\tcdsLength:int,\n" +
"\t\t\t\tproteinPosition:int,\n" +
"\t\t\t\tproteinLength:int,\n" +
"\t\t\t\tdistance:int,\n" +
"\t\t\t\tmessages:ARRAY<string>\n" +
"\t\t\t>>,\n" +
"\t\t\tattributes:MAP<string, string>\n" +
"\t\t>\n" +
"\t>,\n" +
"\tcontigName string,\n" +
"\tstart bigint,\n" +
"\tend bigint,\n" +
"\tvariantCallingAnnotations STRUCT<filtersApplied:boolean, filtersPassed:boolean, filtersFailed:ARRAY<string>, downsampled:boolean, baseQRankSum:float, fisherStrandBiasPValue:float, rmsMapQ:float, mapq0Reads:int, mqRankSum:float, readPositionRankSum:float, genotypePriors:ARRAY<float>, genotypePosteriors:ARRAY<float>, vqslod:float, culprit:string, attributes:MAP<string, string>>,\n" +
"\tvariantCallingAnnotations STRUCT<\n" +
"\t\tfiltersApplied:boolean,\n" +
"\t\tfiltersPassed:boolean,\n" +
"\t\tfiltersFailed:ARRAY<string>,\n" +
"\t\tdownsampled:boolean,\n" +
"\t\tbaseQRankSum:float,\n" +
"\t\tfisherStrandBiasPValue:float,\n" +
"\t\trmsMapQ:float,\n" +
"\t\tmapq0Reads:int,\n" +
"\t\tmqRankSum:float,\n" +
"\t\treadPositionRankSum:float,\n" +
"\t\tgenotypePriors:ARRAY<float>,\n" +
"\t\tgenotypePosteriors:ARRAY<float>,\n" +
"\t\tvqslod:float,\n" +
"\t\tculprit:string,\n" +
"\t\tattributes:MAP<string, string>\n" +
"\t>,\n" +
"\tsampleId string,\n" +
"\tsampleDescription string,\n" +
"\tprocessingDescription string,\n" +
Expand All @@ -150,11 +219,80 @@ public void test_0() throws Exception {
String text = SQLUtils.toSQLString(stmt.clone(), JdbcConstants.HIVE);

assertEquals("CREATE EXTERNAL TABLE user_snp_test_4 (\n" +
"\tvariant STRUCT<contigName:string, start:bigint, end:bigint, names:ARRAY<string>, splitFromMultiAllelic:boolean, referenceAllele:string, alternateAllele:string, quality:double, filtersApplied:boolean, filtersPassed:boolean, filtersFailed:ARRAY<string>, annotation:STRUCT<ancestralAllele:string, alleleCount:int, readDepth:int, forwardReadDepth:int, reverseReadDepth:int, referenceReadDepth:int, referenceForwardReadDepth:int, referenceReverseReadDepth:int, alleleFrequency:float, cigar:string, dbSnp:boolean, hapMap2:boolean, hapMap3:boolean, validated:boolean, thousandGenomes:boolean, somatic:boolean, transcriptEffects:ARRAY<STRUCT<alternateAllele:string, effects:ARRAY<string>, geneName:string, geneId:string, featureType:string, featureId:string, biotype:string, rank:int, total:int, genomicHgvs:string, transcriptHgvs:string, proteinHgvs:string, cdnaPosition:int, cdnaLength:int, cdsPosition:int, cdsLength:int, proteinPosition:int, proteinLength:int, distance:int, messages:ARRAY<string>>>, attributes:MAP<string, string>>>,\n" +
"\tvariant STRUCT<\n" +
"\t\tcontigName:string,\n" +
"\t\tstart:bigint,\n" +
"\t\tend:bigint,\n" +
"\t\tnames:ARRAY<string>,\n" +
"\t\tsplitFromMultiAllelic:boolean,\n" +
"\t\treferenceAllele:string,\n" +
"\t\talternateAllele:string,\n" +
"\t\tquality:double,\n" +
"\t\tfiltersApplied:boolean,\n" +
"\t\tfiltersPassed:boolean,\n" +
"\t\tfiltersFailed:ARRAY<string>,\n" +
"\t\tannotation:STRUCT<\n" +
"\t\t\tancestralAllele:string,\n" +
"\t\t\talleleCount:int,\n" +
"\t\t\treadDepth:int,\n" +
"\t\t\tforwardReadDepth:int,\n" +
"\t\t\treverseReadDepth:int,\n" +
"\t\t\treferenceReadDepth:int,\n" +
"\t\t\treferenceForwardReadDepth:int,\n" +
"\t\t\treferenceReverseReadDepth:int,\n" +
"\t\t\talleleFrequency:float,\n" +
"\t\t\tcigar:string,\n" +
"\t\t\tdbSnp:boolean,\n" +
"\t\t\thapMap2:boolean,\n" +
"\t\t\thapMap3:boolean,\n" +
"\t\t\tvalidated:boolean,\n" +
"\t\t\tthousandGenomes:boolean,\n" +
"\t\t\tsomatic:boolean,\n" +
"\t\t\ttranscriptEffects:ARRAY<STRUCT<\n" +
"\t\t\t\talternateAllele:string,\n" +
"\t\t\t\teffects:ARRAY<string>,\n" +
"\t\t\t\tgeneName:string,\n" +
"\t\t\t\tgeneId:string,\n" +
"\t\t\t\tfeatureType:string,\n" +
"\t\t\t\tfeatureId:string,\n" +
"\t\t\t\tbiotype:string,\n" +
"\t\t\t\trank:int,\n" +
"\t\t\t\ttotal:int,\n" +
"\t\t\t\tgenomicHgvs:string,\n" +
"\t\t\t\ttranscriptHgvs:string,\n" +
"\t\t\t\tproteinHgvs:string,\n" +
"\t\t\t\tcdnaPosition:int,\n" +
"\t\t\t\tcdnaLength:int,\n" +
"\t\t\t\tcdsPosition:int,\n" +
"\t\t\t\tcdsLength:int,\n" +
"\t\t\t\tproteinPosition:int,\n" +
"\t\t\t\tproteinLength:int,\n" +
"\t\t\t\tdistance:int,\n" +
"\t\t\t\tmessages:ARRAY<string>\n" +
"\t\t\t>>,\n" +
"\t\t\tattributes:MAP<string, string>\n" +
"\t\t>\n" +
"\t>,\n" +
"\tcontigName string,\n" +
"\tstart bigint,\n" +
"\tend bigint,\n" +
"\tvariantCallingAnnotations STRUCT<filtersApplied:boolean, filtersPassed:boolean, filtersFailed:ARRAY<string>, downsampled:boolean, baseQRankSum:float, fisherStrandBiasPValue:float, rmsMapQ:float, mapq0Reads:int, mqRankSum:float, readPositionRankSum:float, genotypePriors:ARRAY<float>, genotypePosteriors:ARRAY<float>, vqslod:float, culprit:string, attributes:MAP<string, string>>,\n" +
"\tvariantCallingAnnotations STRUCT<\n" +
"\t\tfiltersApplied:boolean,\n" +
"\t\tfiltersPassed:boolean,\n" +
"\t\tfiltersFailed:ARRAY<string>,\n" +
"\t\tdownsampled:boolean,\n" +
"\t\tbaseQRankSum:float,\n" +
"\t\tfisherStrandBiasPValue:float,\n" +
"\t\trmsMapQ:float,\n" +
"\t\tmapq0Reads:int,\n" +
"\t\tmqRankSum:float,\n" +
"\t\treadPositionRankSum:float,\n" +
"\t\tgenotypePriors:ARRAY<float>,\n" +
"\t\tgenotypePosteriors:ARRAY<float>,\n" +
"\t\tvqslod:float,\n" +
"\t\tculprit:string,\n" +
"\t\tattributes:MAP<string, string>\n" +
"\t>,\n" +
"\tsampleId string,\n" +
"\tsampleDescription string,\n" +
"\tprocessingDescription string,\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,23 @@ public void test_0() throws Exception {

assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS `customer_case`.`shangjian_6e958954-f2c7-11e8-94b7-0c54159e4818.json.snappy` (\n" +
"\t`batch_date` string,\n" +
"\t`data` STRUCT<`goods_name`:STRING, `thumb_url`:STRING, `country`:STRING, `is_app`:INT, `sales_tip`:STRING, `image_url`:STRING, `cnt`:INT, `goods_id`:BIGINT, `hd_thumb_url`:STRING, `is_use_promotion`:INT, `event_type`:INT, `normal_price`:INT, `market_price`:INT, `short_name`:STRING, group:STRUCT<`price`:INT, `customer_num`:INT>>,\n" +
"\t`data` STRUCT<\n" +
"\t\t`goods_name`:STRING,\n" +
"\t\t`thumb_url`:STRING,\n" +
"\t\t`country`:STRING,\n" +
"\t\t`is_app`:INT,\n" +
"\t\t`sales_tip`:STRING,\n" +
"\t\t`image_url`:STRING,\n" +
"\t\t`cnt`:INT,\n" +
"\t\t`goods_id`:BIGINT,\n" +
"\t\t`hd_thumb_url`:STRING,\n" +
"\t\t`is_use_promotion`:INT,\n" +
"\t\t`event_type`:INT,\n" +
"\t\t`normal_price`:INT,\n" +
"\t\t`market_price`:INT,\n" +
"\t\t`short_name`:STRING,\n" +
"\t\tgroup:STRUCT<`price`:INT, `customer_num`:INT>\n" +
"\t>,\n" +
"\t`goods_id` bigint,\n" +
"\t`gtime` bigint,\n" +
"\t`hot_tag` int,\n" +
Expand Down
887 changes: 886 additions & 1 deletion core/src/test/resources/bvt/parser/bigquery/0.txt

Large diffs are not rendered by default.

15 changes: 12 additions & 3 deletions core/src/test/resources/bvt/parser/bigquery/5.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ OPTIONS(
--------------------
CREATE TABLE mydataset.newtable (
x INT64 OPTIONS (description = 'An optional INTEGER field'),
y STRUCT<a ARRAY<STRING> OPTIONS (description = 'A repeated STRING field'), b BOOL>
y STRUCT<
a ARRAY<STRING> OPTIONS (description = 'A repeated STRING field'),
b BOOL
>
)
DEFAULT COLLATE 'und:ci'
PARTITION BY _PARTITIONDATE
Expand Down Expand Up @@ -58,7 +61,10 @@ OPTIONS(
--------------------
CREATE TABLE IF NOT EXISTS mydataset.newtable (
x INT64,
y STRUCT<a ARRAY<STRING>, b BOOL>
y STRUCT<
a ARRAY<STRING>,
b BOOL
>
)
OPTIONS (
expiration_timestamp = TIMESTAMP '2025-01-01 00:00:00 UTC',
Expand All @@ -75,7 +81,10 @@ OPTIONS(
--------------------
CREATE OR REPLACE TABLE mydataset.newtable (
x INT64,
y STRUCT<a ARRAY<STRING>, b BOOL>
y STRUCT<
a ARRAY<STRING>,
b BOOL
>
)
OPTIONS (
expiration_timestamp = TIMESTAMP '2025-01-01 00:00:00 UTC',
Expand Down

0 comments on commit b0af2a0

Please sign in to comment.