-
Notifications
You must be signed in to change notification settings - Fork 85
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Add support for flexible column name in JsonStreamWriter (#1786)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [X] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/java-bigquerystorage/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [X] Ensure the tests and linter pass - [X] Code coverage does not decrease (if any source code was changed) - [X] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> ☕️ If you write sample code, please follow the [samples format]( https://github.com/GoogleCloudPlatform/java-docs-samples/blob/main/SAMPLE_FORMAT.md).
- Loading branch information
Showing
8 changed files
with
265 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
...igquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/BigQuerySchemaUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
/* | ||
* Copyright 2020 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.bigquery.storage.v1; | ||
|
||
import com.google.protobuf.Descriptors.FieldDescriptor; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.Base64; | ||
import java.util.regex.Pattern; | ||
|
||
public class BigQuerySchemaUtil { | ||
|
||
private static final String PROTO_COMPATIBLE_NAME_REGEXP = "[A-Za-z_][A-Za-z0-9_]*"; | ||
private static final String PLACEHOLDER_FILED_NAME_PREFIX = "col_"; | ||
private static final Pattern PROTO_COMPATIBLE_NAME_PATTERN = | ||
Pattern.compile(PROTO_COMPATIBLE_NAME_REGEXP); | ||
|
||
/** | ||
* * Checks if the field name is compatible with proto field naming convention. | ||
* | ||
* @param fieldName name for the field | ||
* @return true if the field name is comptaible with proto naming convention, otherwise, returns | ||
* false. | ||
*/ | ||
public static boolean isProtoCompatible(String fieldName) { | ||
return PROTO_COMPATIBLE_NAME_PATTERN.matcher(fieldName).matches(); | ||
} | ||
|
||
/** | ||
* * Generates a placeholder name that consists of a prefix + base64 encoded field name. We | ||
* replace all dashes with underscores as they are not allowed for proto field names. | ||
* | ||
* @param fieldName name for the field | ||
* @return the generated placeholder field name | ||
*/ | ||
public static String generatePlaceholderFieldName(String fieldName) { | ||
return PLACEHOLDER_FILED_NAME_PREFIX | ||
+ Base64.getUrlEncoder() | ||
.withoutPadding() | ||
.encodeToString(fieldName.getBytes(StandardCharsets.UTF_8)) | ||
.replace('-', '_'); | ||
} | ||
|
||
/** | ||
* * Gets the user-facing field name from the descriptor | ||
* | ||
* @param fieldDescriptor | ||
* @return columnName annotation if present, otherwise return the field name. | ||
*/ | ||
public static String getFieldName(FieldDescriptor fieldDescriptor) { | ||
return fieldDescriptor.getOptions().hasExtension(AnnotationsProto.columnName) | ||
? fieldDescriptor.getOptions().getExtension(AnnotationsProto.columnName) | ||
: fieldDescriptor.getName(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
...erystorage/src/test/java/com/google/cloud/bigquery/storage/v1/BigQuerySchemaUtilTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Copyright 2020 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.bigquery.storage.v1; | ||
|
||
import com.google.cloud.bigquery.storage.test.SchemaTest.TestNestedFlexibleFieldName; | ||
import com.google.protobuf.Descriptors.Descriptor; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import junit.framework.TestCase; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.JUnit4; | ||
|
||
@RunWith(JUnit4.class) | ||
public class BigQuerySchemaUtilTest extends TestCase { | ||
|
||
@Test | ||
public void testIsProtoCompatible() { | ||
List<String> protoCompatibleNames = Arrays.asList("col_1", "name", "_0_"); | ||
List<String> protoIncompatibleNames = Arrays.asList("0_col", "()", "列", "a-1"); | ||
protoCompatibleNames.stream() | ||
.forEach( | ||
name -> { | ||
assertTrue(BigQuerySchemaUtil.isProtoCompatible(name)); | ||
}); | ||
protoIncompatibleNames.stream() | ||
.forEach( | ||
name -> { | ||
assertFalse(BigQuerySchemaUtil.isProtoCompatible(name)); | ||
}); | ||
} | ||
|
||
public void testGeneratePlaceholderFieldName() { | ||
assertEquals("col_c3RyLeWIlw", BigQuerySchemaUtil.generatePlaceholderFieldName("str-列")); | ||
// Base64 url encodes "~/~/" to "fi9-Lw", we replaced - with _ to be proto compatible. | ||
assertEquals("col_fi9_Lw", BigQuerySchemaUtil.generatePlaceholderFieldName("~/~/")); | ||
} | ||
|
||
public void testGetFieldName() { | ||
// Test get name from annotations. | ||
Descriptor flexibleDescriptor = TestNestedFlexibleFieldName.getDescriptor(); | ||
assertEquals("str-列", BigQuerySchemaUtil.getFieldName(flexibleDescriptor.getFields().get(0))); | ||
assertEquals( | ||
"nested-列", BigQuerySchemaUtil.getFieldName(flexibleDescriptor.getFields().get(1))); | ||
|
||
// Test get name without annotations. | ||
Descriptor descriptor = TestNestedFlexibleFieldName.getDescriptor(); | ||
assertEquals("int32_value", BigQuerySchemaUtil.getFieldName(descriptor.getFields().get(0))); | ||
assertEquals("int64_value", BigQuerySchemaUtil.getFieldName(descriptor.getFields().get(1))); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters